Use Co-Z arithmetic for precomputations

- Selected Co-Z formulas from "Scalar Multiplication on Weierstraß Elliptic Curves from Co-Z Arithmetic" (Goundar, Joye, et. al.) added as group methods with new type sep256k1_coz_t. - Co-Z methods used for A and G point precomputations. - WINDOW_A size increased to 6 since the precomputation is much faster per-point. - DBLU cost: 3M+4S, ZADDU cost: 5M+2S. - From 2.4% to 3.8% faster 'bench' results, depending on configuration. - Startup to _ecmult_start() around %50 faster
bitcoin-core · Aug 20, 2014 · be150ae · be150ae
1 parent 8a0ee23
commit be150ae
Show file tree

Hide file tree

Showing 3 changed files with 97 additions and 20 deletions.
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
@@ -11,12 +11,15 @@
 #include "ecmult.h"
 
 // optimal for 128-bit and 256-bit exponents.
-#define WINDOW_A 5
+#define WINDOW_A 6
 
 // larger numbers may result in slightly better performance, at the cost of
 // exponentially larger precomputed tables. WINDOW_G == 14 results in 640 KiB.
 #define WINDOW_G 14
 
+/** The number of entries a table with precomputed multiples needs to have. */
+#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
+
 /** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table.
  *  pre will contains the values [1*a,3*a,5*a,...,(2^(w-1)-1)*a], so it needs place for
  *  2^(w-2) entries.
@@ -29,27 +32,18 @@
  *  To compute a*P + b*G, we use the jacobian version for P, and the affine version for G, as
  *  G is constant, so it only needs to be done once in advance.
  */
-void static secp256k1_ecmult_table_precomp_gej(secp256k1_gej_t *pre, const secp256k1_gej_t *a, int w) {
-    pre[0] = *a;
-    secp256k1_gej_t d; secp256k1_gej_double(&d, &pre[0]);
-    for (int i=1; i<(1 << (w-2)); i++)
-        secp256k1_gej_add(&pre[i], &d, &pre[i-1]);
+void static secp256k1_ecmult_table_precomp_gej(secp256k1_gej_t *prej, const secp256k1_gej_t *a, int w) {
+    secp256k1_coz_t d; secp256k1_coz_dblu(&d, &prej[0], a);
+    for (int i=1; i<ECMULT_TABLE_SIZE(w); i++)
+        secp256k1_coz_zaddu(&prej[i], &d, &prej[i-1]);
 }
 
 void static secp256k1_ecmult_table_precomp_ge(secp256k1_ge_t *pre, const secp256k1_gej_t *a, int w) {
-    const int table_size = 1 << (w-2);
-    secp256k1_gej_t prej[table_size];
-    prej[0] = *a;
-    secp256k1_gej_t d; secp256k1_gej_double(&d, a);
-    for (int i=1; i<table_size; i++) {
-        secp256k1_gej_add(&prej[i], &d, &prej[i-1]);
-    }
-    secp256k1_ge_set_all_gej(table_size, pre, prej);
+    secp256k1_gej_t prej[ECMULT_TABLE_SIZE(w)];
+    secp256k1_ecmult_table_precomp_gej(prej, a, w);
+    secp256k1_ge_set_all_gej(ECMULT_TABLE_SIZE(w), pre, prej);
 }
 
-/** The number of entries a table with precomputed multiples needs to have. */
-#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
-
 /** The following two macro retrieves a particular odd multiple from a table
  *  of precomputed multiples. */
 #define ECMULT_TABLE_GET(r,pre,n,w,neg) do { \
@@ -111,9 +105,10 @@ static void secp256k1_ecmult_start(void) {
     secp256k1_gej_t fn; secp256k1_gej_set_infinity(&fn);
     for (int j=0; j<64; j++) {
         secp256k1_gej_add(&fn, &fn, &gj);
-        secp256k1_gej_t adj = gj;
-        for (int i=1; i<16; i++) {
-            secp256k1_gej_add(&gj, &gj, &adj);
+        secp256k1_coz_t adj; secp256k1_coz_dblu_a(&gj, &adj, &gj);
+        tj[pos++] = gj; 
+        for (int i=2; i<16; i++) {
+            secp256k1_coz_zaddu(&gj, &adj, &gj);
             tj[pos++] = gj;
         }
     }

diff --git a/src/group.h b/src/group.h
@@ -23,6 +23,14 @@ typedef struct {
     int infinity; // whether this represents the point at infinity
 } secp256k1_gej_t;
 
+/** A group element of the secp256k1 curve, with an implicit z coordinate (and infinity flag).
+ *  An instance of secp256k1_coz_t is always "co-z" with some instance of secp256k1_gej_t, from
+ *  which it inherits its implied z coordinate and infinity flag. */
+typedef struct {
+    secp256k1_fe_t x; // actual X: x/z^2 (z implied)
+    secp256k1_fe_t y; // actual Y: y/z^3 (z implied)
+} secp256k1_coz_t;
+
 /** Global constants related to the group */
 typedef struct {
     secp256k1_num_t order; // the order of the curve (= order of its generator)
@@ -112,4 +120,13 @@ void static secp256k1_gej_mul_lambda(secp256k1_gej_t *r, const secp256k1_gej_t *
 void static secp256k1_gej_split_exp(secp256k1_num_t *r1, secp256k1_num_t *r2, const secp256k1_num_t *a);
 #endif
 
+/** Set r equal to the double of a, and ra equal to a, such that r is co-z with ra. */
+void static secp256k1_coz_dblu(secp256k1_coz_t *r, secp256k1_gej_t *ra, const secp256k1_gej_t *a);
+
+/** Set r equal to the double of a, and ra equal to a, such that ra is co-z with r. */
+void static secp256k1_coz_dblu_a(secp256k1_gej_t *r, secp256k1_coz_t *ra, const secp256k1_gej_t *a);
+
+/** Set r equal to the sum of ra and b. ra is initially co-z with b and finally co-z with r. */
+void static secp256k1_coz_zaddu(secp256k1_gej_t *r, secp256k1_coz_t *ra, const secp256k1_gej_t *b);
+
 #endif
diff --git a/src/group_impl.h b/src/group_impl.h
@@ -339,6 +339,71 @@ void static secp256k1_gej_split_exp(secp256k1_num_t *r1, secp256k1_num_t *r2, co
 }
 #endif
 
+void static secp256k1_coz_dblu(secp256k1_coz_t *r, secp256k1_gej_t *ra, const secp256k1_gej_t *a) {
+    if ((ra->infinity = a->infinity))
+        return;
+    secp256k1_fe_t B; secp256k1_fe_sqr(&B, &a->x);
+    secp256k1_fe_t E; secp256k1_fe_sqr(&E, &a->y);
+    secp256k1_fe_t L; secp256k1_fe_sqr(&L, &E);
+    secp256k1_fe_t M = B; secp256k1_fe_mul_int(&M, 3);
+    secp256k1_fe_t *S = &ra->x; secp256k1_fe_mul(S, &a->x, &E); secp256k1_fe_mul_int(S, 4);
+    secp256k1_fe_normalize(S);
+    secp256k1_fe_mul(&ra->z, &a->z, &a->y); secp256k1_fe_mul_int(&ra->z, 2);
+    secp256k1_fe_t t; secp256k1_fe_negate(&t, S, 1); secp256k1_fe_mul_int(&t, 2);
+    secp256k1_fe_sqr(&r->x, &M); secp256k1_fe_add(&r->x, &t);
+    secp256k1_fe_negate(&t, &r->x, 5); secp256k1_fe_add(&t, S);
+    secp256k1_fe_mul(&r->y, &M, &t);
+    ra->y = L; secp256k1_fe_mul_int(&ra->y, 8); secp256k1_fe_normalize(&ra->y);
+    secp256k1_fe_negate(&t, &ra->y, 1); secp256k1_fe_add(&r->y, &t);
+}
+
+void static secp256k1_coz_dblu_a(secp256k1_gej_t *r, secp256k1_coz_t *ra, const secp256k1_gej_t *a) {
+    if ((r->infinity = a->infinity))
+        return;
+    secp256k1_fe_t B; secp256k1_fe_sqr(&B, &a->x);
+    secp256k1_fe_t E; secp256k1_fe_sqr(&E, &a->y);
+    secp256k1_fe_t L; secp256k1_fe_sqr(&L, &E);
+    secp256k1_fe_t M = B; secp256k1_fe_mul_int(&M, 3);
+    secp256k1_fe_t *S = &ra->x; secp256k1_fe_mul(S, &a->x, &E); secp256k1_fe_mul_int(S, 4);
+    secp256k1_fe_normalize(S);
+    secp256k1_fe_mul(&r->z, &a->z, &a->y); secp256k1_fe_mul_int(&r->z, 2);
+    secp256k1_fe_t t; secp256k1_fe_negate(&t, S, 1); secp256k1_fe_mul_int(&t, 2);
+    secp256k1_fe_sqr(&r->x, &M); secp256k1_fe_add(&r->x, &t);
+    secp256k1_fe_negate(&t, &r->x, 5); secp256k1_fe_add(&t, S);
+    secp256k1_fe_mul(&r->y, &M, &t);
+    ra->y = L; secp256k1_fe_mul_int(&ra->y, 8); secp256k1_fe_normalize(&ra->y);
+    secp256k1_fe_negate(&t, &ra->y, 1); secp256k1_fe_add(&r->y, &t);
+}
+
+void static secp256k1_coz_zaddu(secp256k1_gej_t *r, secp256k1_coz_t *ra, const secp256k1_gej_t *b) {
+    if ((r->infinity = b->infinity))
+        return;
+    secp256k1_fe_t u2 = b->x; secp256k1_fe_normalize(&u2);
+    secp256k1_fe_t s2 = b->y; secp256k1_fe_normalize(&s2);
+    secp256k1_fe_t dX; secp256k1_fe_negate(&dX, &u2, 1); secp256k1_fe_add(&dX, &ra->x);
+    secp256k1_fe_t dY; secp256k1_fe_negate(&dY, &s2, 1); secp256k1_fe_add(&dY, &ra->y);
+    secp256k1_fe_normalize(&dX);
+    if (secp256k1_fe_is_zero(&dX)) {
+        secp256k1_fe_normalize(&dY);
+        if (secp256k1_fe_is_zero(&dY)) {
+            secp256k1_coz_dblu_a(r, ra, b);
+        } else {
+            r->infinity = 1;
+        }
+        return;
+    }
+    secp256k1_fe_t C; secp256k1_fe_sqr(&C, &dX);
+    secp256k1_fe_t *W1 = &ra->x; secp256k1_fe_mul(W1, W1, &C);
+    secp256k1_fe_t W2; secp256k1_fe_mul(&W2, &u2, &C);
+    secp256k1_fe_t D; secp256k1_fe_sqr(&D, &dY);
+    secp256k1_fe_t A; secp256k1_fe_negate(&A, W1, 1); secp256k1_fe_add(&A, &W2);
+    secp256k1_fe_mul(&A, &A, &ra->y); secp256k1_fe_negate(&ra->y, &A, 1);
+    r->x = *W1; secp256k1_fe_add(&r->x, &W2); secp256k1_fe_negate(&r->x, &r->x, 2);
+    secp256k1_fe_add(&r->x, &D);
+    secp256k1_fe_negate(&r->y, &r->x, 4); secp256k1_fe_add(&r->y, W1);
+    secp256k1_fe_mul(&r->y, &r->y, &dY); secp256k1_fe_add(&r->y, &A);
+    secp256k1_fe_mul(&r->z, &b->z, &dX);
+}
 
 void static secp256k1_ge_start(void) {
     static const unsigned char secp256k1_ge_consts_order[] = {