diff --git a/doc/speedup-batch.md b/doc/speedup-batch.md new file mode 100644 index 0000000000..c695639892 --- /dev/null +++ b/doc/speedup-batch.md @@ -0,0 +1,15 @@ +# Schnorrsig Batch Verification Speedup + +![Speedup over single verification](speedup-batch/schnorrsig-speedup-batch.png) + +# Tweak Pubkey Check Batch Verification Speedup + +![Speedup over single verification](speedup-batch/tweakcheck-speedup-batch.png) + +Build steps +----------- +To generate the above graphs on your local machine: + + $ cd doc/speedup-batch + $ make + $ make speedup-batch.png diff --git a/doc/speedup-batch/.gitignore b/doc/speedup-batch/.gitignore new file mode 100644 index 0000000000..773a6df9ba --- /dev/null +++ b/doc/speedup-batch/.gitignore @@ -0,0 +1 @@ +*.dat diff --git a/doc/speedup-batch/Makefile b/doc/speedup-batch/Makefile new file mode 100644 index 0000000000..a6a270d348 --- /dev/null +++ b/doc/speedup-batch/Makefile @@ -0,0 +1,23 @@ +schnorrsig_data = schnorrsig_batch.dat schnorrsig_single.dat +tweak_data = tweak_batch.dat tweak_single.dat + +bench_output.txt: bench.sh + SECP256K1_BENCH_ITERS=500000 ./bench.sh bench_output.txt + +schnorrsig_batch.dat: bench_output.txt + cat bench_output.txt | grep -v "schnorrsig_batch_verify_1 " | awk '{ gsub(/ /,""); print }' | awk -F, 'match($$0, /schnorrsig_batch_verify_([0-9]+)/, arr) {print arr[1] " " $$3}' > schnorrsig_batch.dat + +schnorrsig_single.dat: bench_output.txt + cat bench_output.txt | awk '{ gsub(/ /,""); print }' | awk -F, 'match($$0, /schnorrsig_verify/) {print $$3}' > schnorrsig_single.dat + +tweak_batch.dat: bench_output.txt + cat bench_output.txt | grep -v "tweak_check_batch_verify_1 " | awk '{ gsub(/ /,""); print }' | awk -F, 'match($$0, /tweak_check_batch_verify_([0-9]+)/, arr) {print arr[1] " " $$3}' > tweak_batch.dat + +tweak_single.dat: bench_output.txt + cat bench_output.txt | awk '{ gsub(/ /,""); print }' | awk -F, 'match($$0, /tweak_add_check/) {print $$3}' > tweak_single.dat + +speedup-batch.png: $(schnorrsig_data) $(tweak_data) plot.gp + gnuplot plot.gp + +clean: + rm *.log *.txt *.dat *.png diff --git a/doc/speedup-batch/bench.sh b/doc/speedup-batch/bench.sh new file mode 100755 index 0000000000..d38d45dceb --- /dev/null +++ b/doc/speedup-batch/bench.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +output_file=$1 +cur_dir=$(pwd) + +cd ../../ +echo "HEAD: $(git rev-parse --short HEAD)" > "$cur_dir/$output_file.log" +make clean +./autogen.sh +./configure --enable-experimental --enable-module-batch --enable-module-schnorrsig >> "$cur_dir/$output_file.log" +make -j +./bench schnorrsig > "$cur_dir/$output_file" +./bench extrakeys >> "$cur_dir/$output_file" \ No newline at end of file diff --git a/doc/speedup-batch/bench_output.txt b/doc/speedup-batch/bench_output.txt new file mode 100644 index 0000000000..9d257341c1 --- /dev/null +++ b/doc/speedup-batch/bench_output.txt @@ -0,0 +1,137 @@ +Benchmark , Min(us) , Avg(us) , Max(us) + +schnorrsig_sign , 50.4 , 50.5 , 50.7 +schnorrsig_verify , 89.1 , 89.2 , 89.3 +schnorrsig_batch_verify_1 , 104.0 , 104.0 , 104.0 +schnorrsig_batch_verify_2 , 89.0 , 89.1 , 89.1 +schnorrsig_batch_verify_3 , 84.1 , 84.1 , 84.1 +schnorrsig_batch_verify_4 , 81.5 , 81.5 , 81.5 +schnorrsig_batch_verify_5 , 79.9 , 79.9 , 79.9 +schnorrsig_batch_verify_7 , 78.0 , 78.1 , 78.3 +schnorrsig_batch_verify_9 , 77.0 , 77.0 , 77.1 +schnorrsig_batch_verify_11 , 76.2 , 76.3 , 76.3 +schnorrsig_batch_verify_14 , 75.6 , 75.6 , 75.6 +schnorrsig_batch_verify_17 , 75.2 , 75.2 , 75.2 +schnorrsig_batch_verify_21 , 74.8 , 74.8 , 74.8 +schnorrsig_batch_verify_26 , 74.5 , 74.6 , 74.9 +schnorrsig_batch_verify_32 , 74.3 , 74.5 , 74.7 +schnorrsig_batch_verify_39 , 74.1 , 74.1 , 74.1 +schnorrsig_batch_verify_47 , 73.9 , 73.9 , 73.9 +schnorrsig_batch_verify_57 , 74.5 , 74.5 , 74.5 +schnorrsig_batch_verify_69 , 74.3 , 74.3 , 74.5 +schnorrsig_batch_verify_83 , 74.1 , 74.1 , 74.2 +schnorrsig_batch_verify_100 , 73.9 , 74.0 , 74.1 +schnorrsig_batch_verify_121 , 74.1 , 74.1 , 74.2 +schnorrsig_batch_verify_146 , 73.9 , 73.9 , 74.0 +schnorrsig_batch_verify_176 , 74.0 , 74.2 , 74.5 +schnorrsig_batch_verify_212 , 73.9 , 74.1 , 74.1 +schnorrsig_batch_verify_255 , 74.0 , 74.0 , 74.1 +schnorrsig_batch_verify_307 , 73.9 , 74.0 , 74.1 +schnorrsig_batch_verify_369 , 73.9 , 73.9 , 73.9 +schnorrsig_batch_verify_443 , 73.9 , 74.1 , 74.3 +schnorrsig_batch_verify_532 , 74.0 , 74.0 , 74.1 +schnorrsig_batch_verify_639 , 73.9 , 74.0 , 74.0 +schnorrsig_batch_verify_767 , 73.9 , 73.9 , 73.9 +schnorrsig_batch_verify_921 , 74.0 , 74.0 , 74.1 +schnorrsig_batch_verify_1106 , 73.9 , 73.9 , 73.9 +schnorrsig_batch_verify_1328 , 73.9 , 74.1 , 74.2 +schnorrsig_batch_verify_1594 , 74.0 , 74.1 , 74.1 +schnorrsig_batch_verify_1913 , 74.0 , 74.0 , 74.0 +schnorrsig_batch_verify_2296 , 74.0 , 74.0 , 74.0 +schnorrsig_batch_verify_2756 , 73.9 , 74.0 , 74.1 +schnorrsig_batch_verify_3308 , 74.1 , 74.1 , 74.2 +schnorrsig_batch_verify_3970 , 74.1 , 74.2 , 74.4 +schnorrsig_batch_verify_4765 , 74.0 , 74.1 , 74.2 +schnorrsig_batch_verify_5719 , 74.0 , 74.1 , 74.1 +schnorrsig_batch_verify_6863 , 74.0 , 74.1 , 74.1 +schnorrsig_batch_verify_8236 , 74.0 , 74.1 , 74.1 +schnorrsig_batch_verify_9884 , 74.0 , 74.1 , 74.3 +schnorrsig_batch_verify_11861 , 74.0 , 74.0 , 74.1 +schnorrsig_batch_verify_14234 , 73.9 , 74.0 , 74.1 +schnorrsig_batch_verify_17081 , 73.9 , 73.9 , 73.9 +schnorrsig_batch_verify_20498 , 73.9 , 74.0 , 74.0 +schnorrsig_batch_verify_24598 , 73.9 , 74.0 , 74.1 +schnorrsig_batch_verify_29518 , 73.9 , 74.0 , 74.1 +schnorrsig_batch_verify_35422 , 73.9 , 73.9 , 73.9 +schnorrsig_batch_verify_42507 , 73.9 , 74.0 , 74.0 +schnorrsig_batch_verify_51009 , 73.9 , 74.1 , 74.3 +schnorrsig_batch_verify_61211 , 73.9 , 73.9 , 74.0 +schnorrsig_batch_verify_73454 , 73.9 , 74.0 , 74.3 +schnorrsig_batch_verify_88145 , 73.9 , 74.0 , 74.1 +schnorrsig_batch_verify_105775 , 74.0 , 74.1 , 74.1 +schnorrsig_batch_verify_126931 , 73.9 , 74.0 , 74.1 +schnorrsig_batch_verify_152318 , 73.9 , 73.9 , 74.0 +schnorrsig_batch_verify_182782 , 73.9 , 73.9 , 74.0 +schnorrsig_batch_verify_219339 , 73.9 , 73.9 , 74.0 +schnorrsig_batch_verify_263207 , 74.0 , 74.1 , 74.3 +schnorrsig_batch_verify_315849 , 73.9 , 74.0 , 74.0 +schnorrsig_batch_verify_379019 , 73.9 , 73.9 , 73.9 +schnorrsig_batch_verify_454823 , 74.0 , 74.0 , 74.0 +Benchmark , Min(us) , Avg(us) , Max(us) + +tweak_add_check , 64.7 , 64.7 , 65.0 +tweak_check_batch_verify_1 , 69.7 , 69.8 , 69.8 +tweak_check_batch_verify_2 , 57.2 , 57.2 , 57.3 +tweak_check_batch_verify_3 , 52.0 , 52.1 , 52.2 +tweak_check_batch_verify_4 , 49.4 , 49.5 , 49.5 +tweak_check_batch_verify_5 , 47.9 , 47.9 , 47.9 +tweak_check_batch_verify_7 , 46.1 , 46.1 , 46.2 +tweak_check_batch_verify_9 , 45.2 , 45.2 , 45.4 +tweak_check_batch_verify_11 , 44.5 , 44.6 , 44.6 +tweak_check_batch_verify_14 , 43.9 , 43.9 , 43.9 +tweak_check_batch_verify_17 , 43.5 , 43.5 , 43.5 +tweak_check_batch_verify_21 , 43.1 , 43.1 , 43.1 +tweak_check_batch_verify_26 , 42.8 , 42.8 , 42.8 +tweak_check_batch_verify_32 , 42.5 , 42.6 , 42.6 +tweak_check_batch_verify_39 , 42.3 , 42.4 , 42.4 +tweak_check_batch_verify_47 , 42.2 , 42.2 , 42.2 +tweak_check_batch_verify_57 , 42.1 , 42.2 , 42.3 +tweak_check_batch_verify_69 , 42.0 , 42.1 , 42.1 +tweak_check_batch_verify_83 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_100 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_121 , 42.1 , 42.1 , 42.1 +tweak_check_batch_verify_146 , 42.0 , 42.0 , 42.0 +tweak_check_batch_verify_176 , 41.9 , 41.9 , 42.0 +tweak_check_batch_verify_212 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_255 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_307 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_369 , 41.9 , 42.0 , 42.1 +tweak_check_batch_verify_443 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_532 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_639 , 41.9 , 41.9 , 42.0 +tweak_check_batch_verify_767 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_921 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_1106 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_1328 , 41.9 , 41.9 , 42.0 +tweak_check_batch_verify_1594 , 41.9 , 41.9 , 42.0 +tweak_check_batch_verify_1913 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_2296 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_2756 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_3308 , 41.9 , 41.9 , 42.0 +tweak_check_batch_verify_3970 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_4765 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_5719 , 41.9 , 42.0 , 42.1 +tweak_check_batch_verify_6863 , 42.0 , 42.0 , 42.0 +tweak_check_batch_verify_8236 , 42.0 , 42.0 , 42.0 +tweak_check_batch_verify_9884 , 41.9 , 41.9 , 42.0 +tweak_check_batch_verify_11861 , 41.9 , 42.0 , 42.1 +tweak_check_batch_verify_14234 , 41.9 , 42.0 , 42.0 +tweak_check_batch_verify_17081 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_20498 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_24598 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_29518 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_35422 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_42507 , 41.8 , 41.8 , 41.9 +tweak_check_batch_verify_51009 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_61211 , 41.8 , 41.8 , 41.8 +tweak_check_batch_verify_73454 , 41.8 , 42.0 , 42.2 +tweak_check_batch_verify_88145 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_105775 , 41.8 , 41.8 , 41.8 +tweak_check_batch_verify_126931 , 41.8 , 41.9 , 41.9 +tweak_check_batch_verify_152318 , 41.8 , 41.9 , 42.0 +tweak_check_batch_verify_182782 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_219339 , 41.9 , 42.0 , 42.0 +tweak_check_batch_verify_263207 , 41.9 , 42.0 , 42.1 +tweak_check_batch_verify_315849 , 41.9 , 41.9 , 41.9 +tweak_check_batch_verify_379019 , 41.9 , 41.9 , 42.0 +tweak_check_batch_verify_454823 , 41.9 , 41.9 , 41.9 diff --git a/doc/speedup-batch/bench_output.txt.log b/doc/speedup-batch/bench_output.txt.log new file mode 100644 index 0000000000..c289c4aab2 --- /dev/null +++ b/doc/speedup-batch/bench_output.txt.log @@ -0,0 +1,127 @@ +HEAD: 6ddb0d0c +checking build system type... x86_64-pc-linux-gnu +checking host system type... x86_64-pc-linux-gnu +checking for a BSD-compatible install... /usr/bin/install -c +checking whether build environment is sane... yes +checking for a thread-safe mkdir -p... /usr/bin/mkdir -p +checking for gawk... gawk +checking whether make sets $(MAKE)... yes +checking whether make supports nested variables... yes +checking whether make supports nested variables... (cached) yes +checking for gcc... gcc +checking whether the C compiler works... yes +checking for C compiler default output file name... a.out +checking for suffix of executables... +checking whether we are cross compiling... no +checking for suffix of object files... o +checking whether we are using the GNU C compiler... yes +checking whether gcc accepts -g... yes +checking for gcc option to accept ISO C89... none needed +checking whether gcc understands -c and -o together... yes +checking whether make supports the include directive... yes (GNU style) +checking dependency style of gcc... gcc3 +checking dependency style of gcc... gcc3 +checking for ar... ar +checking the archiver (ar) interface... ar +checking how to print strings... printf +checking for a sed that does not truncate output... /usr/bin/sed +checking for grep that handles long lines and -e... /usr/bin/grep +checking for egrep... /usr/bin/grep -E +checking for fgrep... /usr/bin/grep -F +checking for ld used by gcc... /usr/bin/ld +checking if the linker (/usr/bin/ld) is GNU ld... yes +checking for BSD- or MS-compatible name lister (nm)... /usr/bin/nm -B +checking the name lister (/usr/bin/nm -B) interface... BSD nm +checking whether ln -s works... yes +checking the maximum length of command line arguments... 1572864 +checking how to convert x86_64-pc-linux-gnu file names to x86_64-pc-linux-gnu format... func_convert_file_noop +checking how to convert x86_64-pc-linux-gnu file names to toolchain format... func_convert_file_noop +checking for /usr/bin/ld option to reload object files... -r +checking for objdump... objdump +checking how to recognize dependent libraries... pass_all +checking for dlltool... no +checking how to associate runtime and link libraries... printf %s\n +checking for archiver @FILE support... @ +checking for strip... strip +checking for ranlib... ranlib +checking command to parse /usr/bin/nm -B output from gcc object... ok +checking for sysroot... no +checking for a working dd... /usr/bin/dd +checking how to truncate binary pipes... /usr/bin/dd bs=4096 count=1 +checking for mt... mt +checking if mt is a manifest tool... no +checking how to run the C preprocessor... gcc -E +checking for ANSI C header files... yes +checking for sys/types.h... yes +checking for sys/stat.h... yes +checking for stdlib.h... yes +checking for string.h... yes +checking for memory.h... yes +checking for strings.h... yes +checking for inttypes.h... yes +checking for stdint.h... yes +checking for unistd.h... yes +checking for dlfcn.h... yes +checking for objdir... .libs +checking if gcc supports -fno-rtti -fno-exceptions... no +checking for gcc option to produce PIC... -fPIC -DPIC +checking if gcc PIC flag -fPIC -DPIC works... yes +checking if gcc static flag -static works... yes +checking if gcc supports -c -o file.o... yes +checking if gcc supports -c -o file.o... (cached) yes +checking whether the gcc linker (/usr/bin/ld -m elf_x86_64) supports shared libraries... yes +checking whether -lc should be explicitly linked in... no +checking dynamic linker characteristics... GNU/Linux ld.so +checking how to hardcode library paths into programs... immediate +checking whether stripping libraries is possible... yes +checking if libtool supports shared libraries... yes +checking whether to build shared libraries... yes +checking whether to build static libraries... yes +checking if gcc supports -Werror=unknown-warning-option... no +checking if gcc supports -std=c89 -pedantic -Wno-long-long -Wnested-externs -Wshadow -Wstrict-prototypes -Wundef... yes +checking if gcc supports -Wno-overlength-strings... yes +checking if gcc supports -Wall... yes +checking if gcc supports -Wno-unused-function... yes +checking if gcc supports -Wextra... yes +checking if gcc supports -Wcast-align... yes +checking if gcc supports -Wcast-align=strict... yes +checking if gcc supports -Wconditional-uninitialized... no +checking if gcc supports -fvisibility=hidden... yes +checking for valgrind support... yes +checking for x86_64 assembly availability... yes +configure: ****** +configure: WARNING: experimental build +configure: Experimental features do not have stable APIs or properties, and may not be safe for production use. +configure: Building batch verification module: yes +configure: ****** +checking that generated files are newer than configure... done +configure: creating ./config.status +config.status: creating Makefile +config.status: creating libsecp256k1.pc +config.status: creating src/libsecp256k1-config.h +config.status: src/libsecp256k1-config.h is unchanged +config.status: executing depfiles commands +config.status: executing libtool commands + +Build Options: + with external callbacks = no + with benchmarks = yes + with tests = yes + with coverage = no + with examples = no + module ecdh = no + module recovery = no + module extrakeys = yes + module schnorrsig = yes + module batch = yes + + asm = x86_64 + ecmult window size = 15 + ecmult gen prec. bits = 4 + + valgrind = yes + CC = gcc + CPPFLAGS = + SECP_CFLAGS = -O2 -std=c89 -pedantic -Wno-long-long -Wnested-externs -Wshadow -Wstrict-prototypes -Wundef -Wno-overlength-strings -Wall -Wno-unused-function -Wextra -Wcast-align -Wcast-align=strict -fvisibility=hidden + CFLAGS = -g -O2 + LDFLAGS = diff --git a/doc/speedup-batch/plot.gp b/doc/speedup-batch/plot.gp new file mode 100644 index 0000000000..7960ca0fd0 --- /dev/null +++ b/doc/speedup-batch/plot.gp @@ -0,0 +1,41 @@ +set style line 80 lt rgb "#808080" +set style line 81 lt 0 +set style line 81 lt rgb "#808080" +set grid back linestyle 81 +set border 3 back linestyle 80 +set xtics nomirror +set ytics nomirror +set style line 1 lt rgb "#A00000" lw 2 pt 1 +set style line 2 lt rgb "#00A000" lw 2 pt 6 +set style line 3 lt rgb "#5060D0" lw 2 pt 2 +set style line 4 lt rgb "#F25900" lw 2 pt 9 +set key bottom right +set autoscale +unset log +unset label +set xtic auto +set ytic auto +set title "Batch signature verification in libsecp256k1" +set xlabel "Number of signatures (logarithmic)" +set ylabel "Verification time per signature in us" +set grid +set logscale x +set mxtics 10 + +# Generate graph of Schnorr signature benchmark +schnorrsig_single_val=system("cat schnorrsig_single.dat") +set xrange [1.1:] +set xtics add ("2" 2) +set yrange [0.9:] +set ytics -1,0.1,3 +set ylabel "Speedup over single verification" +set term png size 800,600 +set output 'schnorrsig-speedup-batch.png' +plot "schnorrsig_batch.dat" using 1:(schnorrsig_single_val/$2) with points title "" ls 1 + +# Generate graph of tweaked x-only pubkey check benchmark +set title "Batch tweaked x-only pubkey check in libsecp256k1" +set xlabel "Number of tweak checks (logarithmic)" +tweak_single_val=system("cat tweak_single.dat") +set output 'tweakcheck-speedup-batch.png' +plot "tweak_batch.dat" using 1:(tweak_single_val/$2) with points title "" ls 1 diff --git a/doc/speedup-batch/schnorrsig-speedup-batch.png b/doc/speedup-batch/schnorrsig-speedup-batch.png new file mode 100644 index 0000000000..536b831503 Binary files /dev/null and b/doc/speedup-batch/schnorrsig-speedup-batch.png differ diff --git a/doc/speedup-batch/tweakcheck-speedup-batch.png b/doc/speedup-batch/tweakcheck-speedup-batch.png new file mode 100644 index 0000000000..f12e6e273f Binary files /dev/null and b/doc/speedup-batch/tweakcheck-speedup-batch.png differ diff --git a/src/modules/batch/main_impl.h b/src/modules/batch/main_impl.h index 2e8f5b0aa0..81badd5b3b 100644 --- a/src/modules/batch/main_impl.h +++ b/src/modules/batch/main_impl.h @@ -3,6 +3,10 @@ #include "include/secp256k1_batch.h" +/* Maximum number of scalar-point pairs on the batch + * for which `secp256k1_batch_verify` remains efficient */ +#define STRAUSS_MAX_TERMS_PER_BATCH 106 + /* Assume two batch objects (batch1 and batch2) and we call * `batch_add_tweak_check` on batch1 and `batch_add_schnorrsig` on batch2. * In this case, the same randomizer will be generated if the input bytes to @@ -102,6 +106,10 @@ secp256k1_batch* secp256k1_batch_create(const secp256k1_context* ctx, size_t max secp256k1_batch* batch; size_t batch_scratch_size; unsigned char zeros[16] = {0}; + /* max number of scalar-point pairs on scratch up to which Strauss multi multiplication is efficient */ + if (max_terms > STRAUSS_MAX_TERMS_PER_BATCH) { + max_terms = STRAUSS_MAX_TERMS_PER_BATCH; + } VERIFY_CHECK(ctx != NULL); ARG_CHECK(max_terms != 0);