Skip to content

Commit 68ee3fb

Browse files
committed
Revert "Remove unused scalar_sqr"
This reverts commit 5437e7b.
1 parent 8b96292 commit 68ee3fb

File tree

6 files changed

+286
-0
lines changed

6 files changed

+286
-0
lines changed

src/bench_internal.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,15 @@ void bench_scalar_negate(void* arg, int iters) {
9898
}
9999
}
100100

101+
void bench_scalar_sqr(void* arg, int iters) {
102+
int i;
103+
bench_inv *data = (bench_inv*)arg;
104+
105+
for (i = 0; i < iters; i++) {
106+
secp256k1_scalar_sqr(&data->scalar[0], &data->scalar[0]);
107+
}
108+
}
109+
101110
void bench_scalar_mul(void* arg, int iters) {
102111
int i;
103112
bench_inv *data = (bench_inv*)arg;
@@ -376,6 +385,7 @@ int main(int argc, char **argv) {
376385

377386
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100);
378387
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100);
388+
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "sqr")) run_benchmark("scalar_sqr", bench_scalar_sqr, bench_setup, NULL, &data, 10, iters*10);
379389
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10);
380390
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "split")) run_benchmark("scalar_split", bench_scalar_split, bench_setup, NULL, &data, 10, iters);
381391
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, iters);

src/scalar.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a,
6565
* the low bits that were shifted off */
6666
static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n);
6767

68+
/** Compute the square of a scalar (modulo the group order). */
69+
static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a);
70+
6871
/** Compute the inverse of a scalar (modulo the group order). */
6972
static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *a);
7073

src/scalar_4x64_impl.h

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,28 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
224224
VERIFY_CHECK(c1 >= th); \
225225
}
226226

227+
/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
228+
#define muladd2(a,b) { \
229+
uint64_t tl, th, th2, tl2; \
230+
{ \
231+
uint128_t t = (uint128_t)a * b; \
232+
th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \
233+
tl = t; \
234+
} \
235+
th2 = th + th; /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \
236+
c2 += (th2 < th); /* never overflows by contract (verified the next line) */ \
237+
VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
238+
tl2 = tl + tl; /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \
239+
th2 += (tl2 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \
240+
c0 += tl2; /* overflow is handled on the next line */ \
241+
th2 += (c0 < tl2); /* second overflow is handled on the next line */ \
242+
c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \
243+
VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
244+
c1 += th2; /* overflow is handled on the next line */ \
245+
c2 += (c1 < th2); /* never overflows by contract (verified the next line) */ \
246+
VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
247+
}
248+
227249
/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
228250
#define sumadd(a) { \
229251
unsigned int over; \
@@ -733,10 +755,148 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
733755
#endif
734756
}
735757

758+
static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar *a) {
759+
#ifdef USE_ASM_X86_64
760+
__asm__ __volatile__(
761+
/* Preload */
762+
"movq 0(%%rdi), %%r11\n"
763+
"movq 8(%%rdi), %%r12\n"
764+
"movq 16(%%rdi), %%r13\n"
765+
"movq 24(%%rdi), %%r14\n"
766+
/* (rax,rdx) = a0 * a0 */
767+
"movq %%r11, %%rax\n"
768+
"mulq %%r11\n"
769+
/* Extract l0 */
770+
"movq %%rax, 0(%%rsi)\n"
771+
/* (r8,r9,r10) = (rdx,0) */
772+
"movq %%rdx, %%r8\n"
773+
"xorq %%r9, %%r9\n"
774+
"xorq %%r10, %%r10\n"
775+
/* (r8,r9,r10) += 2 * a0 * a1 */
776+
"movq %%r11, %%rax\n"
777+
"mulq %%r12\n"
778+
"addq %%rax, %%r8\n"
779+
"adcq %%rdx, %%r9\n"
780+
"adcq $0, %%r10\n"
781+
"addq %%rax, %%r8\n"
782+
"adcq %%rdx, %%r9\n"
783+
"adcq $0, %%r10\n"
784+
/* Extract l1 */
785+
"movq %%r8, 8(%%rsi)\n"
786+
"xorq %%r8, %%r8\n"
787+
/* (r9,r10,r8) += 2 * a0 * a2 */
788+
"movq %%r11, %%rax\n"
789+
"mulq %%r13\n"
790+
"addq %%rax, %%r9\n"
791+
"adcq %%rdx, %%r10\n"
792+
"adcq $0, %%r8\n"
793+
"addq %%rax, %%r9\n"
794+
"adcq %%rdx, %%r10\n"
795+
"adcq $0, %%r8\n"
796+
/* (r9,r10,r8) += a1 * a1 */
797+
"movq %%r12, %%rax\n"
798+
"mulq %%r12\n"
799+
"addq %%rax, %%r9\n"
800+
"adcq %%rdx, %%r10\n"
801+
"adcq $0, %%r8\n"
802+
/* Extract l2 */
803+
"movq %%r9, 16(%%rsi)\n"
804+
"xorq %%r9, %%r9\n"
805+
/* (r10,r8,r9) += 2 * a0 * a3 */
806+
"movq %%r11, %%rax\n"
807+
"mulq %%r14\n"
808+
"addq %%rax, %%r10\n"
809+
"adcq %%rdx, %%r8\n"
810+
"adcq $0, %%r9\n"
811+
"addq %%rax, %%r10\n"
812+
"adcq %%rdx, %%r8\n"
813+
"adcq $0, %%r9\n"
814+
/* (r10,r8,r9) += 2 * a1 * a2 */
815+
"movq %%r12, %%rax\n"
816+
"mulq %%r13\n"
817+
"addq %%rax, %%r10\n"
818+
"adcq %%rdx, %%r8\n"
819+
"adcq $0, %%r9\n"
820+
"addq %%rax, %%r10\n"
821+
"adcq %%rdx, %%r8\n"
822+
"adcq $0, %%r9\n"
823+
/* Extract l3 */
824+
"movq %%r10, 24(%%rsi)\n"
825+
"xorq %%r10, %%r10\n"
826+
/* (r8,r9,r10) += 2 * a1 * a3 */
827+
"movq %%r12, %%rax\n"
828+
"mulq %%r14\n"
829+
"addq %%rax, %%r8\n"
830+
"adcq %%rdx, %%r9\n"
831+
"adcq $0, %%r10\n"
832+
"addq %%rax, %%r8\n"
833+
"adcq %%rdx, %%r9\n"
834+
"adcq $0, %%r10\n"
835+
/* (r8,r9,r10) += a2 * a2 */
836+
"movq %%r13, %%rax\n"
837+
"mulq %%r13\n"
838+
"addq %%rax, %%r8\n"
839+
"adcq %%rdx, %%r9\n"
840+
"adcq $0, %%r10\n"
841+
/* Extract l4 */
842+
"movq %%r8, 32(%%rsi)\n"
843+
"xorq %%r8, %%r8\n"
844+
/* (r9,r10,r8) += 2 * a2 * a3 */
845+
"movq %%r13, %%rax\n"
846+
"mulq %%r14\n"
847+
"addq %%rax, %%r9\n"
848+
"adcq %%rdx, %%r10\n"
849+
"adcq $0, %%r8\n"
850+
"addq %%rax, %%r9\n"
851+
"adcq %%rdx, %%r10\n"
852+
"adcq $0, %%r8\n"
853+
/* Extract l5 */
854+
"movq %%r9, 40(%%rsi)\n"
855+
/* (r10,r8) += a3 * a3 */
856+
"movq %%r14, %%rax\n"
857+
"mulq %%r14\n"
858+
"addq %%rax, %%r10\n"
859+
"adcq %%rdx, %%r8\n"
860+
/* Extract l6 */
861+
"movq %%r10, 48(%%rsi)\n"
862+
/* Extract l7 */
863+
"movq %%r8, 56(%%rsi)\n"
864+
:
865+
: "S"(l), "D"(a->d)
866+
: "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc", "memory");
867+
#else
868+
/* 160 bit accumulator. */
869+
uint64_t c0 = 0, c1 = 0;
870+
uint32_t c2 = 0;
871+
872+
/* l[0..7] = a[0..3] * b[0..3]. */
873+
muladd_fast(a->d[0], a->d[0]);
874+
extract_fast(l[0]);
875+
muladd2(a->d[0], a->d[1]);
876+
extract(l[1]);
877+
muladd2(a->d[0], a->d[2]);
878+
muladd(a->d[1], a->d[1]);
879+
extract(l[2]);
880+
muladd2(a->d[0], a->d[3]);
881+
muladd2(a->d[1], a->d[2]);
882+
extract(l[3]);
883+
muladd2(a->d[1], a->d[3]);
884+
muladd(a->d[2], a->d[2]);
885+
extract(l[4]);
886+
muladd2(a->d[2], a->d[3]);
887+
extract(l[5]);
888+
muladd_fast(a->d[3], a->d[3]);
889+
extract_fast(l[6]);
890+
VERIFY_CHECK(c1 == 0);
891+
l[7] = c0;
892+
#endif
893+
}
894+
736895
#undef sumadd
737896
#undef sumadd_fast
738897
#undef muladd
739898
#undef muladd_fast
899+
#undef muladd2
740900
#undef extract
741901
#undef extract_fast
742902

@@ -758,6 +918,12 @@ static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
758918
return ret;
759919
}
760920

921+
static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) {
922+
uint64_t l[8];
923+
secp256k1_scalar_sqr_512(l, a);
924+
secp256k1_scalar_reduce_512(r, l);
925+
}
926+
761927
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
762928
r1->d[0] = k->d[0];
763929
r1->d[1] = k->d[1];

src/scalar_8x32_impl.h

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,28 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
306306
VERIFY_CHECK(c1 >= th); \
307307
}
308308

309+
/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
310+
#define muladd2(a,b) { \
311+
uint32_t tl, th, th2, tl2; \
312+
{ \
313+
uint64_t t = (uint64_t)a * b; \
314+
th = t >> 32; /* at most 0xFFFFFFFE */ \
315+
tl = t; \
316+
} \
317+
th2 = th + th; /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \
318+
c2 += (th2 < th); /* never overflows by contract (verified the next line) */ \
319+
VERIFY_CHECK((th2 >= th) || (c2 != 0)); \
320+
tl2 = tl + tl; /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \
321+
th2 += (tl2 < tl); /* at most 0xFFFFFFFF */ \
322+
c0 += tl2; /* overflow is handled on the next line */ \
323+
th2 += (c0 < tl2); /* second overflow is handled on the next line */ \
324+
c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \
325+
VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \
326+
c1 += th2; /* overflow is handled on the next line */ \
327+
c2 += (c1 < th2); /* never overflows by contract (verified the next line) */ \
328+
VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \
329+
}
330+
309331
/** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
310332
#define sumadd(a) { \
311333
unsigned int over; \
@@ -569,10 +591,71 @@ static void secp256k1_scalar_mul_512(uint32_t *l, const secp256k1_scalar *a, con
569591
l[15] = c0;
570592
}
571593

594+
static void secp256k1_scalar_sqr_512(uint32_t *l, const secp256k1_scalar *a) {
595+
/* 96 bit accumulator. */
596+
uint32_t c0 = 0, c1 = 0, c2 = 0;
597+
598+
/* l[0..15] = a[0..7]^2. */
599+
muladd_fast(a->d[0], a->d[0]);
600+
extract_fast(l[0]);
601+
muladd2(a->d[0], a->d[1]);
602+
extract(l[1]);
603+
muladd2(a->d[0], a->d[2]);
604+
muladd(a->d[1], a->d[1]);
605+
extract(l[2]);
606+
muladd2(a->d[0], a->d[3]);
607+
muladd2(a->d[1], a->d[2]);
608+
extract(l[3]);
609+
muladd2(a->d[0], a->d[4]);
610+
muladd2(a->d[1], a->d[3]);
611+
muladd(a->d[2], a->d[2]);
612+
extract(l[4]);
613+
muladd2(a->d[0], a->d[5]);
614+
muladd2(a->d[1], a->d[4]);
615+
muladd2(a->d[2], a->d[3]);
616+
extract(l[5]);
617+
muladd2(a->d[0], a->d[6]);
618+
muladd2(a->d[1], a->d[5]);
619+
muladd2(a->d[2], a->d[4]);
620+
muladd(a->d[3], a->d[3]);
621+
extract(l[6]);
622+
muladd2(a->d[0], a->d[7]);
623+
muladd2(a->d[1], a->d[6]);
624+
muladd2(a->d[2], a->d[5]);
625+
muladd2(a->d[3], a->d[4]);
626+
extract(l[7]);
627+
muladd2(a->d[1], a->d[7]);
628+
muladd2(a->d[2], a->d[6]);
629+
muladd2(a->d[3], a->d[5]);
630+
muladd(a->d[4], a->d[4]);
631+
extract(l[8]);
632+
muladd2(a->d[2], a->d[7]);
633+
muladd2(a->d[3], a->d[6]);
634+
muladd2(a->d[4], a->d[5]);
635+
extract(l[9]);
636+
muladd2(a->d[3], a->d[7]);
637+
muladd2(a->d[4], a->d[6]);
638+
muladd(a->d[5], a->d[5]);
639+
extract(l[10]);
640+
muladd2(a->d[4], a->d[7]);
641+
muladd2(a->d[5], a->d[6]);
642+
extract(l[11]);
643+
muladd2(a->d[5], a->d[7]);
644+
muladd(a->d[6], a->d[6]);
645+
extract(l[12]);
646+
muladd2(a->d[6], a->d[7]);
647+
extract(l[13]);
648+
muladd_fast(a->d[7], a->d[7]);
649+
extract_fast(l[14]);
650+
VERIFY_CHECK(c1 == 0);
651+
l[15] = c0;
652+
}
653+
572654
#undef sumadd
573655
#undef sumadd_fast
574656
#undef muladd
575657
#undef muladd_fast
658+
#undef muladd2
576659
#undef extract
577660
#undef extract_fast
578661

@@ -598,6 +681,12 @@ static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
598681
return ret;
599682
}
600683

684+
static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) {
685+
uint32_t l[16];
686+
secp256k1_scalar_sqr_512(l, a);
687+
secp256k1_scalar_reduce_512(r, l);
688+
}
689+
601690
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
602691
r1->d[0] = k->d[0];
603692
r1->d[1] = k->d[1];

src/scalar_low_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
105105
return ret;
106106
}
107107

108+
static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) {
109+
*r = (*a * *a) % EXHAUSTIVE_TEST_ORDER;
110+
}
111+
108112
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
109113
*r1 = *a;
110114
*r2 = 0;

src/tests.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1898,6 +1898,14 @@ void scalar_test(void) {
18981898
CHECK(secp256k1_scalar_eq(&r1, &r2));
18991899
}
19001900

1901+
{
1902+
/* Test square. */
1903+
secp256k1_scalar r1, r2;
1904+
secp256k1_scalar_sqr(&r1, &s1);
1905+
secp256k1_scalar_mul(&r2, &s1, &s1);
1906+
CHECK(secp256k1_scalar_eq(&r1, &r2));
1907+
}
1908+
19011909
{
19021910
/* Test multiplicative identity. */
19031911
secp256k1_scalar r1, v1;
@@ -2653,6 +2661,12 @@ void run_scalar_tests(void) {
26532661
CHECK(!secp256k1_scalar_check_overflow(&zz));
26542662
CHECK(secp256k1_scalar_eq(&one, &zz));
26552663
}
2664+
secp256k1_scalar_mul(&z, &x, &x);
2665+
CHECK(!secp256k1_scalar_check_overflow(&z));
2666+
secp256k1_scalar_sqr(&zz, &x);
2667+
CHECK(!secp256k1_scalar_check_overflow(&zz));
2668+
CHECK(secp256k1_scalar_eq(&zz, &z));
2669+
CHECK(secp256k1_scalar_eq(&r2, &zz));
26562670
}
26572671
}
26582672
}

0 commit comments

Comments
 (0)