Skip to content

Commit c77fc08

Browse files
committed
Merge bitcoin#486: Add pippenger_wnaf for multi-multiplication
d2f9c6b Use more precise pippenger bucket windows (Jonas Nick) 4c950bb Save some additions per window in _pippenger_wnaf (Peter Dettman) a58f543 Add flags for choosing algorithm in ecmult_multi benchmark (Jonas Nick) 36b22c9 Use scratch space dependent batching in ecmult_multi (Jonas Nick) 355a38f Add pippenger_wnaf ecmult_multi (Jonas Nick) bc65aa7 Add bench_ecmult (Pieter Wuille) dba5471 Add ecmult_multi tests (Andrew Poelstra) 8c1c831 Generalize Strauss to support multiple points (Pieter Wuille) 548de42 add resizeable scratch space API (Andrew Poelstra) Pull request description: This PR is based on bitcoin#473 and adds a variant of "Pippengers algorithm" (see [Bernstein et al., Faster batch forgery identification](https://eprint.iacr.org/2012/549.pdf), page 15 and scipr-lab/libff#10) for point multi-multiplication that performs better with a large number of points than Strauss' algorithm. ![aggsig](https://user-images.githubusercontent.com/2582071/32731185-12c0f108-c881-11e7-83c7-c2432b5fadf5.png) Thanks to @sipa for providing `wnaf_fixed`, benchmarking, and the crucial suggestion to use affine addition. The PR also makes `ecmult_multi` decide which algorithm to use, based on the number of points and the available scratch space. For restricted scratch spaces this can be further optimized in the future (f.e. a 35kB scratch space allows batches of 11 points with strauss or 95 points with pippenger; choosing pippenger would be 5% faster). As soon as this PR has received some feedback I'll repeat the benchmarks to determine the optimal `pippenger_bucket_window` with the new benchmarking code in bitcoin#473. Tree-SHA512: 8e155107a00d35f412300275803f912b1d228b7adff578bc4754c5b29641100b51b9d37f989316b636f7144e6b199febe7de302a44f498bbfd8d463bdbe31a5c
2 parents 6ad5cdb + d2f9c6b commit c77fc08

16 files changed

+1601
-83
lines changed

Makefile.am

+7-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ noinst_HEADERS += src/field_5x52_asm_impl.h
4242
noinst_HEADERS += src/java/org_bitcoin_NativeSecp256k1.h
4343
noinst_HEADERS += src/java/org_bitcoin_Secp256k1Context.h
4444
noinst_HEADERS += src/util.h
45+
noinst_HEADERS += src/scratch.h
46+
noinst_HEADERS += src/scratch_impl.h
4547
noinst_HEADERS += src/testrand.h
4648
noinst_HEADERS += src/testrand_impl.h
4749
noinst_HEADERS += src/hash.h
@@ -79,14 +81,17 @@ libsecp256k1_jni_la_CPPFLAGS = -DSECP256K1_BUILD $(JNI_INCLUDES)
7981

8082
noinst_PROGRAMS =
8183
if USE_BENCHMARK
82-
noinst_PROGRAMS += bench_verify bench_sign bench_internal
84+
noinst_PROGRAMS += bench_verify bench_sign bench_internal bench_ecmult
8385
bench_verify_SOURCES = src/bench_verify.c
8486
bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB)
8587
bench_sign_SOURCES = src/bench_sign.c
8688
bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS) $(SECP_TEST_LIBS) $(COMMON_LIB)
8789
bench_internal_SOURCES = src/bench_internal.c
8890
bench_internal_LDADD = $(SECP_LIBS) $(COMMON_LIB)
8991
bench_internal_CPPFLAGS = -DSECP256K1_BUILD $(SECP_INCLUDES)
92+
bench_ecmult_SOURCES = src/bench_ecmult.c
93+
bench_ecmult_LDADD = $(SECP_LIBS) $(COMMON_LIB)
94+
bench_ecmult_CPPFLAGS = -DSECP256K1_BUILD $(SECP_INCLUDES)
9095
endif
9196

9297
TESTS =
@@ -159,6 +164,7 @@ $(gen_context_BIN): $(gen_context_OBJECTS)
159164
$(libsecp256k1_la_OBJECTS): src/ecmult_static_context.h
160165
$(tests_OBJECTS): src/ecmult_static_context.h
161166
$(bench_internal_OBJECTS): src/ecmult_static_context.h
167+
$(bench_ecmult_OBJECTS): src/ecmult_static_context.h
162168

163169
src/ecmult_static_context.h: $(gen_context_BIN)
164170
./$(gen_context_BIN)

include/secp256k1.h

+35
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ extern "C" {
4242
*/
4343
typedef struct secp256k1_context_struct secp256k1_context;
4444

45+
/** Opaque data structure that holds rewriteable "scratch space"
46+
*
47+
* The purpose of this structure is to replace dynamic memory allocations,
48+
* because we target architectures where this may not be available. It is
49+
* essentially a resizable (within specified parameters) block of bytes,
50+
* which is initially created either by memory allocation or TODO as a pointer
51+
* into some fixed rewritable space.
52+
*
53+
* Unlike the context object, this cannot safely be shared between threads
54+
* without additional synchronization logic.
55+
*/
56+
typedef struct secp256k1_scratch_space_struct secp256k1_scratch_space;
57+
4558
/** Opaque data structure that holds a parsed and valid public key.
4659
*
4760
* The exact representation of data inside is implementation defined and not
@@ -243,6 +256,28 @@ SECP256K1_API void secp256k1_context_set_error_callback(
243256
const void* data
244257
) SECP256K1_ARG_NONNULL(1);
245258

259+
/** Create a secp256k1 scratch space object.
260+
*
261+
* Returns: a newly created scratch space.
262+
* Args: ctx: an existing context object (cannot be NULL)
263+
* In: init_size: initial amount of memory to allocate
264+
* max_size: maximum amount of memory to allocate
265+
*/
266+
SECP256K1_API SECP256K1_WARN_UNUSED_RESULT secp256k1_scratch_space* secp256k1_scratch_space_create(
267+
const secp256k1_context* ctx,
268+
size_t init_size,
269+
size_t max_size
270+
) SECP256K1_ARG_NONNULL(1);
271+
272+
/** Destroy a secp256k1 scratch space.
273+
*
274+
* The pointer may not be used afterwards.
275+
* Args: scratch: space to destroy
276+
*/
277+
SECP256K1_API void secp256k1_scratch_space_destroy(
278+
secp256k1_scratch_space* scratch
279+
);
280+
246281
/** Parse a variable-length public key into the pubkey object.
247282
*
248283
* Returns: 1 if the public key was fully valid.

src/bench.h

+16
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#define SECP256K1_BENCH_H
99

1010
#include <stdio.h>
11+
#include <string.h>
1112
#include <math.h>
1213
#include "sys/time.h"
1314

@@ -63,4 +64,19 @@ void run_benchmark(char *name, void (*benchmark)(void*), void (*setup)(void*), v
6364
printf("us\n");
6465
}
6566

67+
int have_flag(int argc, char** argv, char *flag) {
68+
char** argm = argv + argc;
69+
argv++;
70+
if (argv == argm) {
71+
return 1;
72+
}
73+
while (argv != NULL && argv != argm) {
74+
if (strcmp(*argv, flag) == 0) {
75+
return 1;
76+
}
77+
argv++;
78+
}
79+
return 0;
80+
}
81+
6682
#endif /* SECP256K1_BENCH_H */

src/bench_ecmult.c

+196
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/**********************************************************************
2+
* Copyright (c) 2017 Pieter Wuille *
3+
* Distributed under the MIT software license, see the accompanying *
4+
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5+
**********************************************************************/
6+
#include <stdio.h>
7+
8+
#include "include/secp256k1.h"
9+
10+
#include "util.h"
11+
#include "hash_impl.h"
12+
#include "num_impl.h"
13+
#include "field_impl.h"
14+
#include "group_impl.h"
15+
#include "scalar_impl.h"
16+
#include "ecmult_impl.h"
17+
#include "bench.h"
18+
#include "secp256k1.c"
19+
20+
#define POINTS 32768
21+
#define ITERS 10000
22+
23+
typedef struct {
24+
/* Setup once in advance */
25+
secp256k1_context* ctx;
26+
secp256k1_scratch_space* scratch;
27+
secp256k1_scalar* scalars;
28+
secp256k1_ge* pubkeys;
29+
secp256k1_scalar* seckeys;
30+
secp256k1_gej* expected_output;
31+
secp256k1_ecmult_multi_func ecmult_multi;
32+
33+
/* Changes per test */
34+
size_t count;
35+
int includes_g;
36+
37+
/* Changes per test iteration */
38+
size_t offset1;
39+
size_t offset2;
40+
41+
/* Test output. */
42+
secp256k1_gej* output;
43+
} bench_data;
44+
45+
static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
46+
bench_data* data = (bench_data*)arg;
47+
if (data->includes_g) ++idx;
48+
if (idx == 0) {
49+
*sc = data->scalars[data->offset1];
50+
*ge = secp256k1_ge_const_g;
51+
} else {
52+
*sc = data->scalars[(data->offset1 + idx) % POINTS];
53+
*ge = data->pubkeys[(data->offset2 + idx - 1) % POINTS];
54+
}
55+
return 1;
56+
}
57+
58+
static void bench_ecmult(void* arg) {
59+
bench_data* data = (bench_data*)arg;
60+
61+
size_t count = data->count;
62+
int includes_g = data->includes_g;
63+
size_t iters = 1 + ITERS / count;
64+
size_t iter;
65+
66+
for (iter = 0; iter < iters; ++iter) {
67+
data->ecmult_multi(&data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g);
68+
data->offset1 = (data->offset1 + count) % POINTS;
69+
data->offset2 = (data->offset2 + count - 1) % POINTS;
70+
}
71+
}
72+
73+
static void bench_ecmult_setup(void* arg) {
74+
bench_data* data = (bench_data*)arg;
75+
data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
76+
data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
77+
}
78+
79+
static void bench_ecmult_teardown(void* arg) {
80+
bench_data* data = (bench_data*)arg;
81+
size_t iters = 1 + ITERS / data->count;
82+
size_t iter;
83+
/* Verify the results in teardown, to avoid doing comparisons while benchmarking. */
84+
for (iter = 0; iter < iters; ++iter) {
85+
secp256k1_gej tmp;
86+
secp256k1_gej_add_var(&tmp, &data->output[iter], &data->expected_output[iter], NULL);
87+
CHECK(secp256k1_gej_is_infinity(&tmp));
88+
}
89+
}
90+
91+
static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
92+
secp256k1_sha256 sha256;
93+
unsigned char c[11] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
94+
unsigned char buf[32];
95+
int overflow = 0;
96+
c[6] = num;
97+
c[7] = num >> 8;
98+
c[8] = num >> 16;
99+
c[9] = num >> 24;
100+
secp256k1_sha256_initialize(&sha256);
101+
secp256k1_sha256_write(&sha256, c, sizeof(c));
102+
secp256k1_sha256_finalize(&sha256, buf);
103+
secp256k1_scalar_set_b32(scalar, buf, &overflow);
104+
CHECK(!overflow);
105+
}
106+
107+
static void run_test(bench_data* data, size_t count, int includes_g) {
108+
char str[32];
109+
static const secp256k1_scalar zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
110+
size_t iters = 1 + ITERS / count;
111+
size_t iter;
112+
113+
data->count = count;
114+
data->includes_g = includes_g;
115+
116+
/* Compute (the negation of) the expected results directly. */
117+
data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
118+
data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
119+
for (iter = 0; iter < iters; ++iter) {
120+
secp256k1_scalar tmp;
121+
secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS];
122+
size_t i = 0;
123+
for (i = 0; i + 1 < count; ++i) {
124+
secp256k1_scalar_mul(&tmp, &data->seckeys[(data->offset2++) % POINTS], &data->scalars[(data->offset1++) % POINTS]);
125+
secp256k1_scalar_add(&total, &total, &tmp);
126+
}
127+
secp256k1_scalar_negate(&total, &total);
128+
secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->expected_output[iter], NULL, &zero, &total);
129+
}
130+
131+
/* Run the benchmark. */
132+
sprintf(str, includes_g ? "ecmult_%ig" : "ecmult_%i", (int)count);
133+
run_benchmark(str, bench_ecmult, bench_ecmult_setup, bench_ecmult_teardown, data, 10, count * (1 + ITERS / count));
134+
}
135+
136+
int main(int argc, char **argv) {
137+
bench_data data;
138+
int i, p;
139+
secp256k1_gej* pubkeys_gej;
140+
size_t scratch_size;
141+
142+
if (argc > 1) {
143+
if(have_flag(argc, argv, "pippenger_wnaf")) {
144+
printf("Using pippenger_wnaf:\n");
145+
data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single;
146+
} else if(have_flag(argc, argv, "strauss_wnaf")) {
147+
printf("Using strauss_wnaf:\n");
148+
data.ecmult_multi = secp256k1_ecmult_strauss_batch_single;
149+
}
150+
} else {
151+
data.ecmult_multi = secp256k1_ecmult_multi_var;
152+
}
153+
154+
/* Allocate stuff */
155+
data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
156+
scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16;
157+
data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size, scratch_size);
158+
data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS);
159+
data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS);
160+
data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS);
161+
data.expected_output = malloc(sizeof(secp256k1_gej) * (ITERS + 1));
162+
data.output = malloc(sizeof(secp256k1_gej) * (ITERS + 1));
163+
164+
/* Generate a set of scalars, and private/public keypairs. */
165+
pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
166+
secp256k1_gej_set_ge(&pubkeys_gej[0], &secp256k1_ge_const_g);
167+
secp256k1_scalar_set_int(&data.seckeys[0], 1);
168+
for (i = 0; i < POINTS; ++i) {
169+
generate_scalar(i, &data.scalars[i]);
170+
if (i) {
171+
secp256k1_gej_double_var(&pubkeys_gej[i], &pubkeys_gej[i - 1], NULL);
172+
secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
173+
}
174+
}
175+
secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS, &data.ctx->error_callback);
176+
free(pubkeys_gej);
177+
178+
for (i = 1; i <= 8; ++i) {
179+
run_test(&data, i, 1);
180+
}
181+
182+
for (p = 0; p <= 11; ++p) {
183+
for (i = 9; i <= 16; ++i) {
184+
run_test(&data, i << p, 1);
185+
}
186+
}
187+
secp256k1_context_destroy(data.ctx);
188+
secp256k1_scratch_space_destroy(data.scratch);
189+
free(data.scalars);
190+
free(data.pubkeys);
191+
free(data.seckeys);
192+
free(data.output);
193+
free(data.expected_output);
194+
195+
return(0);
196+
}

src/bench_internal.c

-15
Original file line numberDiff line numberDiff line change
@@ -324,21 +324,6 @@ void bench_num_jacobi(void* arg) {
324324
}
325325
#endif
326326

327-
int have_flag(int argc, char** argv, char *flag) {
328-
char** argm = argv + argc;
329-
argv++;
330-
if (argv == argm) {
331-
return 1;
332-
}
333-
while (argv != NULL && argv != argm) {
334-
if (strcmp(*argv, flag) == 0) {
335-
return 1;
336-
}
337-
argv++;
338-
}
339-
return 0;
340-
}
341-
342327
int main(int argc, char **argv) {
343328
bench_inv data;
344329
if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, 2000000);

src/ecmult.h

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**********************************************************************
2-
* Copyright (c) 2013, 2014 Pieter Wuille *
2+
* Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra *
33
* Distributed under the MIT software license, see the accompanying *
44
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
55
**********************************************************************/
@@ -9,6 +9,8 @@
99

1010
#include "num.h"
1111
#include "group.h"
12+
#include "scalar.h"
13+
#include "scratch.h"
1214

1315
typedef struct {
1416
/* For accelerating the computation of a*P + b*G: */
@@ -28,4 +30,18 @@ static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context *ctx
2830
/** Double multiply: R = na*A + ng*G */
2931
static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng);
3032

33+
typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *data);
34+
35+
/**
36+
* Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai.
37+
* Chooses the right algorithm for a given number of points and scratch space
38+
* size. Resets and overwrites the given scratch space. If the points do not
39+
* fit in the scratch space the algorithm is repeatedly run with batches of
40+
* points.
41+
* Returns: 1 on success (including when inp_g_sc is NULL and n is 0)
42+
* 0 if there is not enough scratch space for a single point or
43+
* callback returns 0
44+
*/
45+
static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n);
46+
3147
#endif /* SECP256K1_ECMULT_H */

src/ecmult_const_impl.h

-7
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,6 @@
1212
#include "ecmult_const.h"
1313
#include "ecmult_impl.h"
1414

15-
#ifdef USE_ENDOMORPHISM
16-
#define WNAF_BITS 128
17-
#else
18-
#define WNAF_BITS 256
19-
#endif
20-
#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))
21-
2215
/* This is like `ECMULT_TABLE_GET_GE` but is constant time */
2316
#define ECMULT_CONST_TABLE_GET_GE(r,pre,n,w) do { \
2417
int m; \

0 commit comments

Comments
 (0)