diff --git a/src/client_min_pk.c b/src/client_min_pk.c index 3f04fb65..ff4e877f 100644 --- a/src/client_min_pk.c +++ b/src/client_min_pk.c @@ -6,11 +6,12 @@ #include "keygen.c" #include "e2.c" -#include "exp2.c" #include "hash_to_field.c" #include "map_to_g2.c" #include "e1.c" #include "exp.c" +#include "recip.c" +#include "sqrt.c" #include "consts.c" #include "vect.c" #include "exports.c" diff --git a/src/client_min_sig.c b/src/client_min_sig.c index ab752ce1..a36962e8 100644 --- a/src/client_min_sig.c +++ b/src/client_min_sig.c @@ -6,11 +6,12 @@ #include "keygen.c" #include "e1.c" -#include "exp.c" #include "hash_to_field.c" #include "map_to_g1.c" #include "e2.c" -#include "exp2.c" +#include "exp.c" +#include "recip.c" +#include "sqrt.c" #include "consts.c" #include "vect.c" #include "exports.c" diff --git a/src/exp.c b/src/exp.c index e702e35a..55c5c5a7 100644 --- a/src/exp.c +++ b/src/exp.c @@ -5,19 +5,6 @@ */ #include "vect.h" -#include "fields.h" - -static void reciprocal_fp(vec384 out, const vec384 inp) -{ - vec768 temp; - - ct_inverse_mod_383(temp, inp, BLS12_381_P); - redc_mont_384(out, temp, BLS12_381_P, p0); - mul_mont_384(out, out, BLS12_381_RR, BLS12_381_P, p0); -} - -void blst_fp_inverse(vec384 out, const vec384 inp) -{ reciprocal_fp(out, inp); } /* * |out| = |inp|^|pow|, small footprint, public exponent @@ -52,85 +39,17 @@ static void exp_mont_384(vec384 out, const vec384 inp, const byte *pow, #endif } -#ifdef __OPTIMIZE_SIZE__ -static void recip_sqrt_fp_3mod4(vec384 out, const vec384 inp) +static void exp_mont_384x(vec384x out, const vec384x inp, const byte *pow, + size_t pow_bits, const vec384 p, limb_t n0) { - static const byte BLS_12_381_P_minus_3_div_4[] = { - TO_BYTES(0xee7fbfffffffeaaa), TO_BYTES(0x07aaffffac54ffff), - TO_BYTES(0xd9cc34a83dac3d89), TO_BYTES(0xd91dd2e13ce144af), - TO_BYTES(0x92c6e9ed90d2eb35), TO_BYTES(0x0680447a8e5ff9a6) - }; + vec384x ret; - exp_mont_384(out, inp, BLS_12_381_P_minus_3_div_4, 379, BLS12_381_P, p0); -} -#else -# if 1 -/* - * "383"-bit variant omits full reductions at the ends of squarings, - * which results in up to ~15% improvement. [One can improve further - * by omitting full reductions even after multiplications and - * performing final reduction at the very end of the chain.] - */ -static inline void sqr_n_mul_fp(vec384 out, const vec384 a, size_t count, - const vec384 b) -{ sqr_n_mul_mont_383(out, a, count, BLS12_381_P, p0, b); } -# else -static void sqr_n_mul_fp(vec384 out, const vec384 a, size_t count, - const vec384 b) -{ - while(count--) { - sqr_fp(out, a); - a = out; + vec_copy(ret, inp, sizeof(ret)); /* |ret| = |inp|^1 */ + --pow_bits; /* most significant bit is accounted for, skip over */ + while (pow_bits--) { + sqr_mont_384x(ret, ret, p, n0); + if (is_bit_set(pow, pow_bits)) + mul_mont_384x(ret, ret, inp, p, n0); } - mul_fp(out, out, b); -} -# endif - -# define sqr(ret,a) sqr_fp(ret,a) -# define mul(ret,a,b) mul_fp(ret,a,b) -# define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp(ret,a,n,b) - -# include "sqrt-addchain.h" -static void recip_sqrt_fp_3mod4(vec384 out, const vec384 inp) -{ - RECIP_SQRT_MOD_BLS12_381_P(out, inp, vec384); + vec_copy(out, ret, sizeof(ret)); /* |out| = |ret| */ } -# undef RECIP_SQRT_MOD_BLS12_381_P - -# undef sqr_n_mul -# undef sqr -# undef mul -#endif - -static bool_t recip_sqrt_fp(vec384 out, const vec384 inp) -{ - vec384 t0, t1; - bool_t ret; - - recip_sqrt_fp_3mod4(t0, inp); - - mul_fp(t1, t0, inp); - sqr_fp(t1, t1); - ret = vec_is_equal(t1, inp, sizeof(t1)); - vec_copy(out, t0, sizeof(t0)); - - return ret; -} - -static bool_t sqrt_fp(vec384 out, const vec384 inp) -{ - vec384 t0, t1; - bool_t ret; - - recip_sqrt_fp_3mod4(t0, inp); - - mul_fp(t0, t0, inp); - sqr_fp(t1, t0); - ret = vec_is_equal(t1, inp, sizeof(t1)); - vec_copy(out, t0, sizeof(t0)); - - return ret; -} - -int blst_fp_sqrt(vec384 out, const vec384 inp) -{ return (int)sqrt_fp(out, inp); } diff --git a/src/exp2.c b/src/exp2.c deleted file mode 100644 index 09245f83..00000000 --- a/src/exp2.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright Supranational LLC - * Licensed under the Apache License, Version 2.0, see LICENSE for details. - * SPDX-License-Identifier: Apache-2.0 - */ - -#include "vect.h" -#include "fields.h" - -static void reciprocal_fp2(vec384x out, const vec384x inp) -{ - vec384 t0, t1; - - /* - * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i - */ - sqr_fp(t0, inp[0]); - sqr_fp(t1, inp[1]); - add_fp(t0, t0, t1); - reciprocal_fp(t1, t0); - mul_fp(out[0], inp[0], t1); - mul_fp(out[1], inp[1], t1); - neg_fp(out[1], out[1]); -} - -void blst_fp2_inverse(vec384x out, const vec384x inp) -{ reciprocal_fp2(out, inp); } - -void blst_fp2_eucl_inverse(vec384x out, const vec384x inp) -{ reciprocal_fp2(out, inp); } - -/* - * |out| = |inp|^|pow|, small footprint, public exponent - */ -static void exp_mont_384x(vec384x out, const vec384x inp, const byte *pow, - size_t pow_bits, const vec384 p, limb_t n0) -{ - vec384x ret; - - vec_copy(ret, inp, sizeof(ret)); /* |ret| = |inp|^1 */ - --pow_bits; /* most significant bit is accounted for, skip over */ - while (pow_bits--) { - sqr_mont_384x(ret, ret, p, n0); - if (is_bit_set(pow, pow_bits)) - mul_mont_384x(ret, ret, inp, p, n0); - } - vec_copy(out, ret, sizeof(ret)); /* |out| = |ret| */ -} - -#ifdef __OPTIMIZE_SIZE__ -static void recip_sqrt_fp2_9mod16(vec384x out, const vec384x inp) -{ - static const byte BLS_12_381_P_2_minus_9_div_16[] = { - TO_BYTES(0xb26aa00001c718e3), TO_BYTES(0xd7ced6b1d76382ea), - TO_BYTES(0x3162c338362113cf), TO_BYTES(0x966bf91ed3e71b74), - TO_BYTES(0xb292e85a87091a04), TO_BYTES(0x11d68619c86185c7), - TO_BYTES(0xef53149330978ef0), TO_BYTES(0x050a62cfd16ddca6), - TO_BYTES(0x466e59e49349e8bd), TO_BYTES(0x9e2dc90e50e7046b), - TO_BYTES(0x74bd278eaa22f25e), TO_BYTES(0x002a437a4b8c35fc) - }; - - exp_mont_384x(out, inp, BLS_12_381_P_2_minus_9_div_16, 758, - BLS12_381_P, p0); -} -#else -static void sqr_n_mul_fp2(vec384x out, const vec384x a, size_t count, - const vec384x b) -{ - while(count--) { - sqr_mont_382x(out, a, BLS12_381_P, p0); - a = out; - } - mul_mont_384x(out, out, b, BLS12_381_P, p0); -} - -# define sqr(ret,a) sqr_fp2(ret,a) -# define mul(ret,a,b) mul_fp2(ret,a,b) -# define sqr_n_mul(ret,a,n,b) sqr_n_mul_fp2(ret,a,n,b) - -# include "sqrt2-addchain.h" -static void recip_sqrt_fp2_9mod16(vec384x out, const vec384x inp) -{ - RECIP_SQRT_MOD_BLS12_381_P2(out, inp, vec384x); -} -# undef RECIP_SQRT_MOD_BLS12_381_P2 - -# undef sqr_n_mul -# undef sqr -# undef mul -#endif - -static bool_t sqrt_align_fp2(vec384x out, const vec384x ret, - const vec384x sqrt, const vec384x inp) -{ - static const vec384x sqrt_minus_1 = { { 0 }, { ONE_MONT_P } }; - static const vec384x sqrt_sqrt_minus_1 = { - /* - * "magic" number is ±2^((p-3)/4)%p, which is "1/sqrt(2)", - * in quotes because 2*"1/sqrt(2)"^2 == -1 mod p, not 1, - * but it pivots into "complex" plane nevertheless... - */ - { TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183), - TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18), - TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) }, - { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), - TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), - TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } - }; - static const vec384x sqrt_minus_sqrt_minus_1 = { - { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), - TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), - TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) }, - { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c), - TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7), - TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } - }; - vec384x coeff, t0, t1; - bool_t is_sqrt, flag; - - /* - * Instead of multiple trial squarings we can perform just one - * and see if the result is "rotated by multiple of 90°" in - * relation to |inp|, and "rotate" |ret| accordingly. - */ - sqr_fp2(t0, sqrt); - /* "sqrt(|inp|)"^2 = (a + b*i)^2 = (a^2-b^2) + 2ab*i */ - - /* (a^2-b^2) + 2ab*i == |inp| ? |ret| is spot on */ - sub_fp2(t1, t0, inp); - is_sqrt = vec_is_zero(t1, sizeof(t1)); - vec_copy(coeff, BLS12_381_Rx.p2, sizeof(coeff)); - - /* -(a^2-b^2) - 2ab*i == |inp| ? "rotate |ret| by 90°" */ - add_fp2(t1, t0, inp); - vec_select(coeff, sqrt_minus_1, coeff, sizeof(coeff), - flag = vec_is_zero(t1, sizeof(t1))); - is_sqrt |= flag; - - /* 2ab - (a^2-b^2)*i == |inp| ? "rotate |ret| by 135°" */ - sub_fp(t1[0], t0[0], inp[1]); - add_fp(t1[1], t0[1], inp[0]); - vec_select(coeff, sqrt_sqrt_minus_1, coeff, sizeof(coeff), - flag = vec_is_zero(t1, sizeof(t1))); - is_sqrt |= flag; - - /* -2ab + (a^2-b^2)*i == |inp| ? "rotate |ret| by 45°" */ - add_fp(t1[0], t0[0], inp[1]); - sub_fp(t1[1], t0[1], inp[0]); - vec_select(coeff, sqrt_minus_sqrt_minus_1, coeff, sizeof(coeff), - flag = vec_is_zero(t1, sizeof(t1))); - is_sqrt |= flag; - - /* actual "rotation" */ - mul_fp2(out, ret, coeff); - - return is_sqrt; -} - -static bool_t recip_sqrt_fp2(vec384x out, const vec384x inp) -{ - vec384x ret, sqrt; - - recip_sqrt_fp2_9mod16(ret, inp); - mul_fp2(sqrt, ret, inp); - - /* - * Now see if |ret| is or can be made 1/sqrt(|inp|)... - */ - - return sqrt_align_fp2(out, ret, sqrt, inp); -} - -static bool_t sqrt_fp2(vec384x out, const vec384x inp) -{ - vec384x ret; - - recip_sqrt_fp2_9mod16(ret, inp); - mul_fp2(ret, ret, inp); - - /* - * Now see if |ret| is or can be made sqrt(|inp|)... - */ - - return sqrt_align_fp2(out, ret, ret, inp); -} - -int blst_fp2_sqrt(vec384x out, const vec384x inp) -{ return (int)sqrt_fp2(out, inp); } diff --git a/src/exports.c b/src/exports.c index 8eb62dff..3e353492 100644 --- a/src/exports.c +++ b/src/exports.c @@ -80,9 +80,6 @@ void blst_fp_sqr(vec384 ret, const vec384 a) void blst_fp_cneg(vec384 ret, const vec384 a, int flag) { cneg_fp(ret, a, is_zero(flag) ^ 1); } -void blst_fp_eucl_inverse(vec384 ret, const vec384 a) -{ reciprocal_fp(ret, a); } - void blst_fp_to(vec384 ret, const vec384 a) { mul_fp(ret, a, BLS12_381_RR); } diff --git a/src/fields.h b/src/fields.h index ebb26fd0..b64e7d5d 100644 --- a/src/fields.h +++ b/src/fields.h @@ -84,6 +84,17 @@ static inline void cneg_fp2(vec384x ret, const vec384x a, bool_t flag) #define vec_load_global vec_copy +static void reciprocal_fp(vec384 out, const vec384 inp); +static bool_t recip_sqrt_fp(vec384 out, const vec384 inp); +static bool_t sqrt_fp(vec384 out, const vec384 inp); + +static void reciprocal_fp2(vec384x out, const vec384x inp); +static bool_t recip_sqrt_fp2(vec384x out, const vec384x inp, + const vec384x recip_ZZZ, const vec384x magic_ZZZ); +static bool_t sqrt_fp2(vec384x out, const vec384x inp); +static bool_t sqrt_align_fp2(vec384x out, const vec384x ret, + const vec384x sqrt, const vec384x inp); + typedef vec384x vec384fp2; typedef vec384fp2 vec384fp6[3]; typedef vec384fp6 vec384fp12[2]; diff --git a/src/map_to_g2.c b/src/map_to_g2.c index 8e45a773..a7e06a39 100644 --- a/src/map_to_g2.c +++ b/src/map_to_g2.c @@ -186,14 +186,6 @@ static void map_to_isogenous_E2(POINTonE2 *p, const vec384x u) TO_LIMB_T(0x07e83a49a2e99d69), TO_LIMB_T(0xeca8f3318332bb7a), TO_LIMB_T(0xef148d1ea0f4c069), TO_LIMB_T(0x040ab3263eff0206) } }; - static const vec384x sqrt_ZZZ = { /* (Z^3)^((P^2+7)/16) */ - { TO_LIMB_T(0x019af5f980a3680c), TO_LIMB_T(0x4ed7da0e66063afa), - TO_LIMB_T(0x600354723b5d9972), TO_LIMB_T(0x8b2f958b20d09d72), - TO_LIMB_T(0x0474938f02d461db), TO_LIMB_T(0x0dcf8b9e0684ab1c) }, - { TO_LIMB_T(0x486f252db11dd19c), TO_LIMB_T(0x791ffda2c3d18950), - TO_LIMB_T(0x5af6c27debf95eb4), TO_LIMB_T(0x73b1fd8f2a929cde), - TO_LIMB_T(0xfc59602a1a90b871), TO_LIMB_T(0x08d7daafa8baddb3) } - }; static const vec384x recip_ZZZ = { /* 1/(Z^3) */ { TO_LIMB_T(0x65018f5c28f598eb), TO_LIMB_T(0xe6020417f022d916), TO_LIMB_T(0xd6327313288369c7), TO_LIMB_T(0x622ded8eb447156f), @@ -202,6 +194,16 @@ static void map_to_isogenous_E2(POINTonE2 *p, const vec384x u) TO_LIMB_T(0x67e495a909e7a18e), TO_LIMB_T(0xdf2da23b8145b8f7), TO_LIMB_T(0xcf5d3728310ebf6d), TO_LIMB_T(0x11be446236f4c116) } }; + static const vec384x magic_ZZZ = { /* 1/Z^3 = a + b*i */ + /* a^2 + b^2 */ + { TO_LIMB_T(0xaa7eb851eb8508e0), TO_LIMB_T(0x1c54fdf360989374), + TO_LIMB_T(0xc87f2fc6e716c62e), TO_LIMB_T(0x0124aefb1f9efea7), + TO_LIMB_T(0xb2f8be63e844865c), TO_LIMB_T(0x08b47f775a7ef35a) }, + /* (a^2 + b^2)^((P-3)/4) */ + { TO_LIMB_T(0xe4132bbd838cf70a), TO_LIMB_T(0x01d769ac83772c19), + TO_LIMB_T(0xa83dd6e974c22e45), TO_LIMB_T(0xbc8ec3e777b08dff), + TO_LIMB_T(0xc035c2042ecf5da3), TO_LIMB_T(0x073929e97f0850bf) } + }; static const vec384x ZxA = { /* 240 - 480*i */ { TO_LIMB_T(0xe53a000003135242), TO_LIMB_T(0x01080c0fdef80285), TO_LIMB_T(0xe7889edbe340f6bd), TO_LIMB_T(0x0b51375126310601), @@ -210,7 +212,7 @@ static void map_to_isogenous_E2(POINTonE2 *p, const vec384x u) TO_LIMB_T(0xff50678a26dffece), TO_LIMB_T(0xb24c28679aa8197a), TO_LIMB_T(0x908a1ebe5708d058), TO_LIMB_T(0x0fc0ba017f2b2466) } }; - vec384x uu, tv2, tv3, tv4, x2n, gx1, gxd, y2; + vec384x uu, tv2, tv4, x2n, gx1, gxd, y2; #if 0 vec384x xn, x1n, xd, y, y1, Zuu; #else @@ -255,14 +257,10 @@ static void map_to_isogenous_E2(POINTonE2 *p, const vec384x u) sqr_fp2(tv4, gxd); /* tv4 = gxd^2 */ mul_fp2(tv2, gx1, gxd); /* tv2 = gx1 * gxd */ mul_fp2(tv4, tv4, tv2); /* tv4 = tv4 * tv2 # gx1*gxd^3 */ - e2 = recip_sqrt_fp2(y1, tv4); /* y1 = tv4^c1 # (gx1*gxd^3)^((p^2-9)/16) */ - mul_fp2(y2, y1, sqrt_ZZZ); /* y2 = y1 * c2 # y2 = y1*sqrt(Z^3) */ - mul_fp2(tv4, tv4, recip_ZZZ); - mul_fp2(tv3, y2, tv4); - (void)sqrt_align_fp2(y2, y2, tv3, tv4); + e2 = recip_sqrt_fp2(y1, tv4, /* y1 = tv4^c1 # (gx1*gxd^3)^((p^2-9)/16) */ + recip_ZZZ, magic_ZZZ); mul_fp2(y1, y1, tv2); /* y1 = y1 * tv2 # gx1*gxd*y1 */ - mul_fp2(y2, y2, tv2); /* y2 = y2 * tv2 # gx1*gxd*y2 */ - mul_fp2(y2, y2, uu); /* y2 = y2 * uu */ + mul_fp2(y2, y1, uu); /* y2 = y1 * uu */ mul_fp2(y2, y2, u); /* y2 = y2 * u */ /* choose numerators */ diff --git a/src/recip.c b/src/recip.c new file mode 100644 index 00000000..dbcc9425 --- /dev/null +++ b/src/recip.c @@ -0,0 +1,44 @@ +/* + * Copyright Supranational LLC + * Licensed under the Apache License, Version 2.0, see LICENSE for details. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "fields.h" + +static void reciprocal_fp(vec384 out, const vec384 inp) +{ + vec768 temp; + + ct_inverse_mod_383(temp, inp, BLS12_381_P); + redc_mont_384(out, temp, BLS12_381_P, p0); + mul_mont_384(out, out, BLS12_381_RR, BLS12_381_P, p0); +} + +void blst_fp_inverse(vec384 out, const vec384 inp) +{ reciprocal_fp(out, inp); } + +void blst_fp_eucl_inverse(vec384 ret, const vec384 a) +{ reciprocal_fp(ret, a); } + +static void reciprocal_fp2(vec384x out, const vec384x inp) +{ + vec384 t0, t1; + + /* + * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i + */ + sqr_fp(t0, inp[0]); + sqr_fp(t1, inp[1]); + add_fp(t0, t0, t1); + reciprocal_fp(t1, t0); + mul_fp(out[0], inp[0], t1); + mul_fp(out[1], inp[1], t1); + neg_fp(out[1], out[1]); +} + +void blst_fp2_inverse(vec384x out, const vec384x inp) +{ reciprocal_fp2(out, inp); } + +void blst_fp2_eucl_inverse(vec384x out, const vec384x inp) +{ reciprocal_fp2(out, inp); } diff --git a/src/server.c b/src/server.c index 73938dc7..2d29d99f 100644 --- a/src/server.c +++ b/src/server.c @@ -7,14 +7,15 @@ #include "keygen.c" #include "hash_to_field.c" #include "e1.c" -#include "exp.c" #include "map_to_g1.c" #include "e2.c" -#include "exp2.c" #include "map_to_g2.c" #include "fp12_tower.c" #include "pairing.c" #include "aggregate.c" +#include "exp.c" +#include "recip.c" +#include "sqrt.c" #include "consts.c" #include "vect.c" #include "exports.c" diff --git a/src/vect.h b/src/vect.h index 442446e9..900079fb 100644 --- a/src/vect.h +++ b/src/vect.h @@ -166,17 +166,8 @@ void sub_mod_384x384(vec768 ret, const vec768 a, const vec768 b, */ static void exp_mont_384(vec384 out, const vec384 inp, const byte *pow, size_t pow_bits, const vec384 p, limb_t n0); -static void reciprocal_fp(vec384 out, const vec384 inp); -static bool_t recip_sqrt_fp(vec384 out, const vec384 inp); -static bool_t sqrt_fp(vec384 out, const vec384 inp); - static void exp_mont_384x(vec384x out, const vec384x inp, const byte *pow, size_t pow_bits, const vec384 p, limb_t n0); -static void reciprocal_fp2(vec384x out, const vec384x inp); -static bool_t recip_sqrt_fp2(vec384x out, const vec384x inp); -static bool_t sqrt_fp2(vec384x out, const vec384x inp); -static bool_t sqrt_align_fp2(vec384x out, const vec384x ret, - const vec384x sqrt, const vec384x inp); static void div_by_zz(limb_t val[]); static void div_by_z(limb_t val[]);