Skip to content

Commit

Permalink
Refactor Pornin's algorithm.
Browse files Browse the repository at this point in the history
  • Loading branch information
dfaranha committed May 9, 2024
1 parent a8bea16 commit 43ad9ef
Showing 1 changed file with 185 additions and 169 deletions.
354 changes: 185 additions & 169 deletions src/fp/relic_fp_smb.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,129 @@
/* Private definitions */
/*============================================================================*/

#if FP_SMB == BINAR || !defined(STRIP)
/**
* Approach heavily inpired in the blst implementation of the algorithm.
*/
static dig_t porninstep(dis_t m[4],const dig_t f[2], const dig_t g[2],
dig_t k, size_t s) {
dig_t limbx, ai = 1, bi = 0, ci = 0, di = 1;
dig_t g_lo = g[0], g_hi = g[1], f_lo = f[0], f_hi = f[1];
dig_t t_lo, t_hi, odd, borrow, xorm;

/* Unrolling twice gives some small speedup. */
for (size_t i = 0; i < s; i+=2) {
odd = 0 - (g_lo & 1);

/* g_ -= f_ if g_ is odd */
t_lo = g_lo, t_hi = g_hi;

borrow = 0;
limbx = g_lo - (f_lo & odd);
borrow = (g_lo < limbx);
g_lo = limbx;

limbx = g_hi - (f_hi & odd);
xorm = limbx - borrow;
borrow = -((g_hi < limbx) || (borrow && !limbx));
g_hi = xorm;

k += ((t_lo & f_lo) >> 1) & borrow;

/* negate g_-f_ if it borrowed */
g_lo ^= borrow;
g_hi ^= borrow;
limbx = g_lo + (borrow & 1);
g_hi += (g_lo < limbx);
g_lo = limbx;

/* f_=g_ if g_-f_ borrowed */
f_lo = ((t_lo ^ f_lo) & borrow) ^ f_lo;
f_hi = ((t_hi ^ f_hi) & borrow) ^ f_hi;

/* exchange ai and ci if g_-f_ borrowed */
xorm = (ai ^ ci) & borrow;
ai ^= xorm;
ci ^= xorm;

/* exchange bi and di if g_-f_ borrowed */
xorm = (bi ^ di) & borrow;
bi ^= xorm;
di ^= xorm;

/* subtract if g_ was odd */
ai -= ci & odd;
bi -= di & odd;

ci <<= 1;
di <<= 1;
g_lo >>= 1;
g_lo |= g_hi << (RLC_DIG - 1);
g_hi >>= 1;

k += (f_lo + 2) >> 2;

odd = 0 - (g_lo & 1);

/* g_ -= f_ if g_ is odd */
t_lo = g_lo, t_hi = g_hi;

borrow = 0;
limbx = g_lo - (f_lo & odd);
borrow = (g_lo < limbx);
g_lo = limbx;

limbx = g_hi - (f_hi & odd);
xorm = limbx - borrow;
borrow = -((g_hi < limbx) || (borrow && !limbx));
g_hi = xorm;

k += ((t_lo & f_lo) >> 1) & borrow;

/* negate g_-f_ if it borrowed */
g_lo ^= borrow;
g_hi ^= borrow;
limbx = g_lo + (borrow & 1);
g_hi += (g_lo < limbx);
g_lo = limbx;

/* f_=g_ if g_-f_ borrowed */
f_lo = ((t_lo ^ f_lo) & borrow) ^ f_lo;
f_hi = ((t_hi ^ f_hi) & borrow) ^ f_hi;

/* exchange ai and ci if g_-f_ borrowed */
xorm = (ai ^ ci) & borrow;
ai ^= xorm;
ci ^= xorm;

/* exchange bi and di if g_-f_ borrowed */
xorm = (bi ^ di) & borrow;
bi ^= xorm;
di ^= xorm;

/* subtract if g_ was odd */
ai -= ci & odd;
bi -= di & odd;

ci <<= 1;
di <<= 1;
g_lo >>= 1;
g_lo |= g_hi << (RLC_DIG - 1);
g_hi >>= 1;

k += (f_lo + 2) >> 2;
}

m[0] = ai;
m[1] = bi;
m[2] = ci;
m[3] = di;

return k;
}

#endif

#if FP_SMB == JMPDS || !defined(STRIP)

static dis_t jumpdivstep(dis_t m[4], dig_t *k, dis_t delta, dis_t y, dis_t x,
Expand Down Expand Up @@ -147,198 +270,91 @@ int fp_smb_basic(const fp_t a) {

#if FP_SMB == BINAR || !defined(STRIP)

static inline dig_t is_zero(dig_t l) {
l = ~l & (l - 1);
return (l >> (RLC_DIG - 1));
}

static dig_t lshift_2(dig_t hi, dig_t lo, size_t l) {
size_t r = RLC_DIG - l;
dig_t mask = 0 - (is_zero(l) ^ 1);
return (hi << (l & (RLC_DIG - 1))) | ((lo & mask) >> (r & (RLC_DIG - 1)));
}

static void ab_approximation_n(dig_t a_[2], const dig_t a[],
dig_t b_[2], const dig_t b[]) {
dig_t a_hi, a_lo, b_hi, b_lo, mask;
size_t i;

i = RLC_FP_DIGS - 1;
a_hi = a[i], a_lo = a[i - 1];
b_hi = b[i], b_lo = b[i - 1];
for (int j = i - 1; j >= 0; j--) {
mask = 0 - is_zero(a_hi | b_hi);
a_hi = ((a_lo ^ a_hi) & mask) ^ a_hi;
b_hi = ((b_lo ^ b_hi) & mask) ^ b_hi;
a_lo = ((a[j] ^ a_lo) & mask) ^ a_lo;
b_lo = ((b[j] ^ b_lo) & mask) ^ b_lo;
}
i = RLC_DIG - util_bits_dig(a_hi | b_hi);
/* |i| can be RLC_DIG if all a[2..]|b[2..] were zeros */

a_[0] = a[0], a_[1] = lshift_2(a_hi, a_lo, i);
b_[0] = b[0], b_[1] = lshift_2(b_hi, b_lo, i);
}

static dig_t smul_n_shift_n(dig_t ret[], const dig_t a[], dig_t *f_,
const dig_t b[], dig_t *g_) {
dig_t a_[RLC_FP_DIGS + 1], b_[RLC_FP_DIGS + 1], f, g, neg, carry, hi;
size_t i;

/* |a|*|f_| */
f = *f_;
neg = 0 - RLC_SIGN(f);
f = (f ^ neg) - neg; /* ensure |f| is positive */
bn_negs_low(a_, a, -neg, RLC_FP_DIGS);
hi = fp_mul1_low(a_, a_, f);
a_[RLC_FP_DIGS] = hi - (f & neg);

/* |b|*|g_| */
g = *g_;
neg = 0 - RLC_SIGN(g);
g = (g ^ neg) - neg; /* ensure |g| is positive */
bn_negs_low(b_, b, -neg, RLC_FP_DIGS);
hi = fp_mul1_low(b_, b_, g);
b_[RLC_FP_DIGS] = hi - (g & neg);

/* |a|*|f_| + |b|*|g_| */
(void)bn_addn_low(a_, a_, b_, RLC_FP_DIGS + 1);

/* (|a|*|f_| + |b|*|g_|) >> k */
for (carry = a_[0], i = 0; i < RLC_FP_DIGS; i++) {
hi = carry >> (RLC_DIG - 2);
carry = a_[i + 1];
ret[i] = hi | (carry << 2);
}

/* ensure result is non-negative, fix up |f_| and |g_| accordingly */
neg = 0 - RLC_SIGN(carry);
*f_ = (*f_ ^ neg) - neg;
*g_ = (*g_ ^ neg) - neg;
bn_negs_low(ret, ret, -neg, RLC_FP_DIGS);

return neg;
}

/*
* Copy of inner_loop_n above, but with |L| updates.
*/
static dig_t legendre_loop_n(dig_t l, dig_t m[4], const dig_t a_[2],
const dig_t b_[2], size_t n) {
dig_t limbx, f0 = 1, g0 = 0, f1 = 0, g1 = 1;
dig_t a_lo, a_hi, b_lo, b_hi, t_lo, t_hi, odd, borrow, xorm;

a_lo = a_[0], a_hi = a_[1];
b_lo = b_[0], b_hi = b_[1];

while (n--) {
odd = 0 - (a_lo & 1);

/* a_ -= b_ if a_ is odd */
t_lo = a_lo, t_hi = a_hi;

borrow = 0;
limbx = a_lo - (b_lo & odd);
borrow = (a_lo < limbx);
a_lo = limbx;

limbx = a_hi - (b_hi & odd);
xorm = limbx - borrow;
borrow = -((a_hi < limbx) || (borrow && !limbx));
a_hi = xorm;

l += ((t_lo & b_lo) >> 1) & borrow;

/* negate a_-b_ if it borrowed */
a_lo ^= borrow;
a_hi ^= borrow;
limbx = a_lo + (borrow & 1);
a_hi += (a_lo < limbx);
a_lo = limbx;

/* b_=a_ if a_-b_ borrowed */
b_lo = ((t_lo ^ b_lo) & borrow) ^ b_lo;
b_hi = ((t_hi ^ b_hi) & borrow) ^ b_hi;

/* exchange f0 and f1 if a_-b_ borrowed */
xorm = (f0 ^ f1) & borrow;
f0 ^= xorm;
f1 ^= xorm;

/* exchange g0 and g1 if a_-b_ borrowed */
xorm = (g0 ^ g1) & borrow;
g0 ^= xorm;
g1 ^= xorm;

/* subtract if a_ was odd */
f0 -= f1 & odd;
g0 -= g1 & odd;

f1 <<= 1;
g1 <<= 1;
a_lo >>= 1;
a_lo |= a_hi << (RLC_DIG - 1);
a_hi >>= 1;

l += (b_lo + 2) >> 2;
}

m[0] = f0;
m[1] = g0;
m[2] = f1;
m[3] = g1;

return l;
}
#define RLC_LSH(H, L, I) \
(H << I) | (L & -(I != 0)) >> ((RLC_DIG - I) & (RLC_DIG - 1))

int fp_smb_binar(const fp_t a) {
const size_t s = RLC_DIG - 2;
dv_t x, y, t;
dig_t a_[2], b_[2], neg, l = 0, m[4];
bn_t _t;
dv_t f, g, t, t0, t1;
dig_t g_[2], f_[2], neg, l, g_hi, g_lo, f_hi, f_lo, mask, k = 0;
dis_t m[4];
int iterations = 2 * RLC_FP_DIGS * RLC_DIG;

if (fp_is_zero(a)) {
return 0;
}

bn_null(_t);
dv_null(x);
dv_null(y);
dv_null(f);
dv_null(g);
dv_null(t);
dv_null(t0);
dv_null(t1);

RLC_TRY {
bn_new(_t);
dv_new(x);
dv_new(y);
dv_new(f);
dv_new(g);
dv_new(t);
dv_new(t0);
dv_new(t1);

dv_zero(t, 2 * RLC_FP_DIGS);
dv_copy(f, fp_prime_get(), RLC_FP_DIGS);
#if FP_RDC == MONTY
/* Convert a from Montgomery form. */
fp_copy(t, a);
fp_rdcn_low(g, t);
#else
fp_copy(g, a);
#endif

fp_prime_back(_t, a);
dv_zero(x, RLC_FP_DIGS);
dv_copy(x, _t->dp, _t->used);
dv_copy(y, fp_prime_get(), RLC_FP_DIGS);

for (size_t i = 0; i < iterations / s; i++) {
ab_approximation_n(a_, x, b_, y);
l = legendre_loop_n(l, m, a_, b_, s);
neg = smul_n_shift_n(t, x, &m[0], y, &m[1]);
(void)smul_n_shift_n(y, x, &m[2], y, &m[3]);
fp_copy(x, t);
l += (y[0] >> 1) & neg;
for (size_t len, i = 0; i < iterations / s; i++) {
f_hi = f[RLC_FP_DIGS - 1], f_lo = f[RLC_FP_DIGS - 2];
g_hi = g[RLC_FP_DIGS - 1], g_lo = g[RLC_FP_DIGS - 2];
for (int j = RLC_FP_DIGS - 2; j >= 0; j--) {
l = (f_hi | g_hi);
l = ~l & (l - 1);
mask = -(l >> (RLC_DIG - 1));
f_hi = ((f_lo ^ f_hi) & mask) ^ f_hi;
g_hi = ((g_lo ^ g_hi) & mask) ^ g_hi;
f_lo = ((f[j] ^ f_lo) & mask) ^ f_lo;
g_lo = ((g[j] ^ g_lo) & mask) ^ g_lo;
}
len = RLC_DIG - util_bits_dig(f_hi | g_hi);
f_[0] = f[0], f_[1] = RLC_LSH(f_hi, f_lo, len);
g_[0] = g[0], g_[1] = RLC_LSH(g_hi, g_lo, len);

k = porninstep(m, f_, g_, k, s);

t0[RLC_FP_DIGS] = bn_muls_low(t0, g, RLC_POS, m[0], RLC_FP_DIGS);
t1[RLC_FP_DIGS] = bn_muls_low(t1, f, RLC_POS, m[1], RLC_FP_DIGS);
bn_addn_low(t0, t0, t1, RLC_FP_DIGS + 1);
neg = RLC_SIGN(t0[RLC_FP_DIGS]);
bn_rshb_low(t, t0, RLC_FP_DIGS + 1, (RLC_DIG - 2));
bn_negs_low(t, t, neg, RLC_FP_DIGS);

t0[RLC_FP_DIGS] = bn_muls_low(t0, g, RLC_POS, m[2], RLC_FP_DIGS);
t1[RLC_FP_DIGS] = bn_muls_low(t1, f, RLC_POS, m[3], RLC_FP_DIGS);
bn_addn_low(t1, t1, t0, RLC_FP_DIGS + 1);
bn_rshb_low(f, t1, RLC_FP_DIGS + 1, (RLC_DIG - 2));
bn_negs_low(f, f, RLC_SIGN(t1[RLC_FP_DIGS]), RLC_FP_DIGS);

fp_copy(g, t);
k += (f[0] >> 1) & neg;
}

l = legendre_loop_n(l, m, x, y, iterations % s);
k = porninstep(m, g, f, k, iterations % s);

} RLC_CATCH_ANY {
RLC_THROW(ERR_CAUGHT)
} RLC_FINALLY {
bn_free(_t);
dv_free(x);
dv_free(y);
dv_free(f);
dv_free(g);
dv_free(t);
dv_free(t0);
dv_free(t1);
}

return (l & 1 ? -1 : 1);
return (k & 1 ? -1 : 1);
}

#endif
Expand Down

0 comments on commit 43ad9ef

Please sign in to comment.