Skip to content

Commit e34ceb3

Browse files
committed
Merge bitcoin#557: Eliminate scratch memory used when generating contexts
b3bf5f9 ecmult_impl: expand comment to explain how effective affine interacts with everything (Andrew Poelstra) efa783f Store z-ratios in the 'x' coord they'll recover (Peter Dettman) ffd3b34 add `secp256k1_ge_set_all_gej_var` test which deals with many infinite points (Andrew Poelstra) 84740ac ecmult_impl: save one fe_inv_var (Andrew Poelstra) 4704527 ecmult_impl: eliminate scratch memory used when generating context (Andrew Poelstra) 7f7a2ed ecmult_gen_impl: eliminate scratch memory used when generating context (Andrew Poelstra) Pull request description: Builds on bitcoin#553 Tree-SHA512: 6031a601a4a476c1d21fc8db219383e7930434d2f199543c61aca0118412322dd814a0109c385ff1f83d16897170dd0c25051697b0f88f15234b0059b661af41
2 parents 314a61d + b3bf5f9 commit e34ceb3

File tree

6 files changed

+174
-59
lines changed

6 files changed

+174
-59
lines changed

src/bench_ecmult.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ int main(int argc, char **argv) {
172172
secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
173173
}
174174
}
175-
secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS, &data.ctx->error_callback);
175+
secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS);
176176
free(pubkeys_gej);
177177

178178
for (i = 1; i <= 8; ++i) {

src/ecmult_gen_impl.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
7777
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
7878
}
7979
}
80-
secp256k1_ge_set_all_gej_var(prec, precj, 1024, cb);
80+
secp256k1_ge_set_all_gej_var(prec, precj, 1024);
8181
}
8282
for (j = 0; j < 64; j++) {
8383
for (i = 0; i < 16; i++) {

src/ecmult_impl.h

+127-16
Original file line numberDiff line numberDiff line change
@@ -137,24 +137,135 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
137137
secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
138138
}
139139

140-
static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage *pre, const secp256k1_gej *a, const secp256k1_callback *cb) {
141-
secp256k1_gej *prej = (secp256k1_gej*)checked_malloc(cb, sizeof(secp256k1_gej) * n);
142-
secp256k1_ge *prea = (secp256k1_ge*)checked_malloc(cb, sizeof(secp256k1_ge) * n);
143-
secp256k1_fe *zr = (secp256k1_fe*)checked_malloc(cb, sizeof(secp256k1_fe) * n);
140+
static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) {
141+
secp256k1_gej d;
142+
secp256k1_ge d_ge, p_ge;
143+
secp256k1_gej pj;
144+
secp256k1_fe zi;
145+
secp256k1_fe zr;
146+
secp256k1_fe dx_over_dz_squared;
144147
int i;
145148

146-
/* Compute the odd multiples in Jacobian form. */
147-
secp256k1_ecmult_odd_multiples_table(n, prej, zr, a);
148-
/* Convert them in batch to affine coordinates. */
149-
secp256k1_ge_set_table_gej_var(prea, prej, zr, n);
150-
/* Convert them to compact storage form. */
151-
for (i = 0; i < n; i++) {
152-
secp256k1_ge_to_storage(&pre[i], &prea[i]);
149+
VERIFY_CHECK(!a->infinity);
150+
151+
secp256k1_gej_double_var(&d, a, NULL);
152+
153+
/* First, we perform all the additions in an isomorphic curve obtained by multiplying
154+
* all `z` coordinates by 1/`d.z`. In these coordinates `d` is affine so we can use
155+
* `secp256k1_gej_add_ge_var` to perform the additions. For each addition, we store
156+
* the resulting y-coordinate and the z-ratio, since we only have enough memory to
157+
* store two field elements. These are sufficient to efficiently undo the isomorphism
158+
* and recompute all the `x`s.
159+
*/
160+
d_ge.x = d.x;
161+
d_ge.y = d.y;
162+
d_ge.infinity = 0;
163+
164+
secp256k1_ge_set_gej_zinv(&p_ge, a, &d.z);
165+
pj.x = p_ge.x;
166+
pj.y = p_ge.y;
167+
pj.z = a->z;
168+
pj.infinity = 0;
169+
170+
for (i = 0; i < (n - 1); i++) {
171+
secp256k1_fe_normalize_var(&pj.y);
172+
secp256k1_fe_to_storage(&pre[i].y, &pj.y);
173+
secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr);
174+
secp256k1_fe_normalize_var(&zr);
175+
secp256k1_fe_to_storage(&pre[i].x, &zr);
153176
}
154177

155-
free(prea);
156-
free(prej);
157-
free(zr);
178+
/* Invert d.z in the same batch, preserving pj.z so we can extract 1/d.z */
179+
secp256k1_fe_mul(&zi, &pj.z, &d.z);
180+
secp256k1_fe_inv_var(&zi, &zi);
181+
182+
/* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so
183+
* that we can combine it with the saved z-ratios to compute the other zs
184+
* without any more inversions. */
185+
secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi);
186+
secp256k1_ge_to_storage(&pre[n - 1], &p_ge);
187+
188+
/* Compute the actual x-coordinate of D, which will be needed below. */
189+
secp256k1_fe_mul(&d.z, &zi, &pj.z); /* d.z = 1/d.z */
190+
secp256k1_fe_sqr(&dx_over_dz_squared, &d.z);
191+
secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x);
192+
193+
/* Going into the second loop, we have set `pre[n-1]` to its final affine
194+
* form, but still need to set `pre[i]` for `i` in 0 through `n-2`. We
195+
* have `zi = (p.z * d.z)^-1`, where
196+
*
197+
* `p.z` is the z-coordinate of the point on the isomorphic curve
198+
* which was ultimately assigned to `pre[n-1]`.
199+
* `d.z` is the multiplier that must be applied to all z-coordinates
200+
* to move from our isomorphic curve back to secp256k1; so the
201+
* product `p.z * d.z` is the z-coordinate of the secp256k1
202+
* point assigned to `pre[n-1]`.
203+
*
204+
* All subsequent inverse-z-coordinates can be obtained by multiplying this
205+
* factor by successive z-ratios, which is much more efficient than directly
206+
* computing each one.
207+
*
208+
* Importantly, these inverse-zs will be coordinates of points on secp256k1,
209+
* while our other stored values come from computations on the isomorphic
210+
* curve. So in the below loop, we will take care not to actually use `zi`
211+
* or any derived values until we're back on secp256k1.
212+
*/
213+
i = n - 1;
214+
while (i > 0) {
215+
secp256k1_fe zi2, zi3;
216+
const secp256k1_fe *rzr;
217+
i--;
218+
219+
secp256k1_ge_from_storage(&p_ge, &pre[i]);
220+
221+
/* For each remaining point, we extract the z-ratio from the stored
222+
* x-coordinate, compute its z^-1 from that, and compute the full
223+
* point from that. */
224+
rzr = &p_ge.x;
225+
secp256k1_fe_mul(&zi, &zi, rzr);
226+
secp256k1_fe_sqr(&zi2, &zi);
227+
secp256k1_fe_mul(&zi3, &zi2, &zi);
228+
/* To compute the actual x-coordinate, we use the stored z ratio and
229+
* y-coordinate, which we obtained from `secp256k1_gej_add_ge_var`
230+
* in the loop above, as well as the inverse of the square of its
231+
* z-coordinate. We store the latter in the `zi2` variable, which is
232+
* computed iteratively starting from the overall Z inverse then
233+
* multiplying by each z-ratio in turn.
234+
*
235+
* Denoting the z-ratio as `rzr`, we observe that it is equal to `h`
236+
* from the inside of the above `gej_add_ge_var` call. This satisfies
237+
*
238+
* rzr = d_x * z^2 - x * d_z^2
239+
*
240+
* where (`d_x`, `d_z`) are Jacobian coordinates of `D` and `(x, z)`
241+
* are Jacobian coordinates of our desired point -- except both are on
242+
* the isomorphic curve that we were using when we called `gej_add_ge_var`.
243+
* To get back to secp256k1, we must multiply both `z`s by `d_z`, or
244+
* equivalently divide both `x`s by `d_z^2`. Our equation then becomes
245+
*
246+
* rzr = d_x * z^2 / d_z^2 - x
247+
*
248+
* (The left-hand-side, being a ratio of z-coordinates, is unaffected
249+
* by the isomorphism.)
250+
*
251+
* Rearranging to solve for `x`, we have
252+
*
253+
* x = d_x * z^2 / d_z^2 - rzr
254+
*
255+
* But what we actually want is the affine coordinate `X = x/z^2`,
256+
* which will satisfy
257+
*
258+
* X = d_x / d_z^2 - rzr / z^2
259+
* = dx_over_dz_squared - rzr * zi2
260+
*/
261+
secp256k1_fe_mul(&p_ge.x, rzr, &zi2);
262+
secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1);
263+
secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared);
264+
/* y is stored_y/z^3, as we expect */
265+
secp256k1_fe_mul(&p_ge.y, &p_ge.y, &zi3);
266+
/* Store */
267+
secp256k1_ge_to_storage(&pre[i], &p_ge);
268+
}
158269
}
159270

160271
/** The following two macro retrieves a particular odd multiple from a table
@@ -202,7 +313,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
202313
ctx->pre_g = (secp256k1_ge_storage (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
203314

204315
/* precompute the tables with odd multiples */
205-
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj, cb);
316+
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);
206317

207318
#ifdef USE_ENDOMORPHISM
208319
{
@@ -216,7 +327,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
216327
for (i = 0; i < 128; i++) {
217328
secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
218329
}
219-
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j, cb);
330+
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
220331
}
221332
#endif
222333
}

src/group.h

+1-6
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,7 @@ static void secp256k1_ge_neg(secp256k1_ge *r, const secp256k1_ge *a);
6565
static void secp256k1_ge_set_gej(secp256k1_ge *r, secp256k1_gej *a);
6666

6767
/** Set a batch of group elements equal to the inputs given in jacobian coordinates */
68-
static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len, const secp256k1_callback *cb);
69-
70-
/** Set a batch of group elements equal to the inputs given in jacobian
71-
* coordinates (with known z-ratios). zr must contain the known z-ratios such
72-
* that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */
73-
static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len);
68+
static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len);
7469

7570
/** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to
7671
* the same global z "denominator". zr must contain the known z-ratios such

src/group_impl.h

+27-30
Original file line numberDiff line numberDiff line change
@@ -126,46 +126,43 @@ static void secp256k1_ge_set_gej_var(secp256k1_ge *r, secp256k1_gej *a) {
126126
r->y = a->y;
127127
}
128128

129-
static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len, const secp256k1_callback *cb) {
130-
secp256k1_fe *az;
131-
secp256k1_fe *azi;
129+
static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len) {
130+
secp256k1_fe u;
132131
size_t i;
133-
size_t count = 0;
134-
az = (secp256k1_fe *)checked_malloc(cb, sizeof(secp256k1_fe) * len);
132+
size_t last_i = SIZE_MAX;
133+
135134
for (i = 0; i < len; i++) {
136135
if (!a[i].infinity) {
137-
az[count++] = a[i].z;
136+
/* Use destination's x coordinates as scratch space */
137+
if (last_i == SIZE_MAX) {
138+
r[i].x = a[i].z;
139+
} else {
140+
secp256k1_fe_mul(&r[i].x, &r[last_i].x, &a[i].z);
141+
}
142+
last_i = i;
138143
}
139144
}
145+
if (last_i == SIZE_MAX) {
146+
return;
147+
}
148+
secp256k1_fe_inv_var(&u, &r[last_i].x);
140149

141-
azi = (secp256k1_fe *)checked_malloc(cb, sizeof(secp256k1_fe) * count);
142-
secp256k1_fe_inv_all_var(azi, az, count);
143-
free(az);
144-
145-
count = 0;
146-
for (i = 0; i < len; i++) {
147-
r[i].infinity = a[i].infinity;
150+
i = last_i;
151+
while (i > 0) {
152+
i--;
148153
if (!a[i].infinity) {
149-
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &azi[count++]);
154+
secp256k1_fe_mul(&r[last_i].x, &r[i].x, &u);
155+
secp256k1_fe_mul(&u, &u, &a[last_i].z);
156+
last_i = i;
150157
}
151158
}
152-
free(azi);
153-
}
159+
VERIFY_CHECK(!a[last_i].infinity);
160+
r[last_i].x = u;
154161

155-
static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len) {
156-
size_t i = len - 1;
157-
secp256k1_fe zi;
158-
159-
if (len > 0) {
160-
/* Compute the inverse of the last z coordinate, and use it to compute the last affine output. */
161-
secp256k1_fe_inv(&zi, &a[i].z);
162-
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
163-
164-
/* Work out way backwards, using the z-ratios to scale the x/y values. */
165-
while (i > 0) {
166-
secp256k1_fe_mul(&zi, &zi, &zr[i]);
167-
i--;
168-
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
162+
for (i = 0; i < len; i++) {
163+
r[i].infinity = a[i].infinity;
164+
if (!a[i].infinity) {
165+
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &r[i].x);
169166
}
170167
}
171168
}

src/tests.c

+17-5
Original file line numberDiff line numberDiff line change
@@ -2095,28 +2095,40 @@ void test_ge(void) {
20952095
/* Test batch gej -> ge conversion with and without known z ratios. */
20962096
{
20972097
secp256k1_fe *zr = (secp256k1_fe *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_fe));
2098-
secp256k1_ge *ge_set_table = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
20992098
secp256k1_ge *ge_set_all = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
21002099
for (i = 0; i < 4 * runs + 1; i++) {
21012100
/* Compute gej[i + 1].z / gez[i].z (with gej[n].z taken to be 1). */
21022101
if (i < 4 * runs) {
21032102
secp256k1_fe_mul(&zr[i + 1], &zinv[i], &gej[i + 1].z);
21042103
}
21052104
}
2106-
secp256k1_ge_set_table_gej_var(ge_set_table, gej, zr, 4 * runs + 1);
2107-
secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1, &ctx->error_callback);
2105+
secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1);
21082106
for (i = 0; i < 4 * runs + 1; i++) {
21092107
secp256k1_fe s;
21102108
random_fe_non_zero(&s);
21112109
secp256k1_gej_rescale(&gej[i], &s);
2112-
ge_equals_gej(&ge_set_table[i], &gej[i]);
21132110
ge_equals_gej(&ge_set_all[i], &gej[i]);
21142111
}
2115-
free(ge_set_table);
21162112
free(ge_set_all);
21172113
free(zr);
21182114
}
21192115

2116+
/* Test batch gej -> ge conversion with many infinities. */
2117+
for (i = 0; i < 4 * runs + 1; i++) {
2118+
random_group_element_test(&ge[i]);
2119+
/* randomly set half the points to infinitiy */
2120+
if(secp256k1_fe_is_odd(&ge[i].x)) {
2121+
secp256k1_ge_set_infinity(&ge[i]);
2122+
}
2123+
secp256k1_gej_set_ge(&gej[i], &ge[i]);
2124+
}
2125+
/* batch invert */
2126+
secp256k1_ge_set_all_gej_var(ge, gej, 4 * runs + 1);
2127+
/* check result */
2128+
for (i = 0; i < 4 * runs + 1; i++) {
2129+
ge_equals_gej(&ge[i], &gej[i]);
2130+
}
2131+
21202132
free(ge);
21212133
free(gej);
21222134
free(zinv);

0 commit comments

Comments
 (0)