Skip to content

Commit

Permalink
added tomsfastmath-0.05
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom St Denis authored and sjaeckel committed Jul 22, 2010
1 parent f91cf2d commit a6c4c5a
Show file tree
Hide file tree
Showing 21 changed files with 830 additions and 310 deletions.
7 changes: 7 additions & 0 deletions changes.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
August 1st, 2005
0.05 -- Quick fix to the fp_invmod.c code to let it handle even moduli [required for LTC]
-- Added makefile.shared to make shared objects [required for LTC]
-- Improved makefiles to make them way more configurable
-- Added timing resistant fp_exptmod() enabled with TFM_TIMING_RESISTANT

July 23rd, 2005
0.04 -- Fixed bugs in the SSE2 squaring code
-- Rewrote the multipliers to be optimized for small inputs
-- Nelson Bolyard of the NSS crew submitted [among other things] new faster Montgomery reduction
Expand Down
139 changes: 95 additions & 44 deletions comba_mont_gen.c
Original file line number Diff line number Diff line change
@@ -1,59 +1,112 @@
/* generate montgomery reductions for m->used = 1...16 */

#include <stdio.h>

int main(void)
{
int N;

for (N = 1; N <= 16; N++) {

printf("void fp_montgomery_reduce_%d(fp_int *a, fp_int *m, fp_digit mp)\n", N);
int x, y, z;

printf(
#if 0
"#ifdef TFM_SMALL_SET\n"
"/* computes x/R == x (mod N) via Montgomery Reduction */\n"
"void fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp)\n"
"{\n"
" fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;\n"
" int oldused, x, y;\n"
" fp_digit c[FP_SIZE], *_c, *tmpm, mu, cy;\n"
" int oldused, x, y, pa;\n"
"\n"
"#if defined(USE_MEMSET)\n"
" /* now zero the buff */\n"
" memset(c, 0, sizeof(c));\n"
" memset(c, 0, sizeof c);\n"
"#endif\n"
" pa = m->used;\n"
"\n"
" /* copy the input */\n"
" oldused = a->used;\n"
" for (x = 0; x < oldused; x++) {\n"
" c[x] = a->dp[x];\n"
" }\n"
"\n"
"#if !defined(USE_MEMSET)\n"
" for (; x < 2*pa+3; x++) {\n"
" c[x] = 0;\n"
" }\n"
"#endif\n"
" MONT_START;\n"
#endif
"\n"
" /* now let's get bizz-sy! */\n"
" for (x = 0; x < %d; x++) {\n"
" /* get Mu for this round */\n"
" LOOP_START;\n"
"\n"
" /* our friendly neighbourhood alias */\n"
" _c = c + x;\n"
" tmpm = m->dp;\n"
"\n"
" for (y = 0; y < %d; y++) {\n"
" INNERMUL;\n"
" ++_c;\n"
" }\n"
" /* send carry up man... */\n"
" _c = c + x;\n"
" PROPCARRY;\n"
" } \n"
"\n"
" /* fix the rest of the carries */\n"
" _c = c + %d;\n"
" for (x = %d; x < %d * 2 + 2; x++) {\n"
" PROPCARRY;\n"
" ++_c;\n"
" switch (pa) {\n");

for (x = 1; x <= 64; x++) {
if (x > 16 && (x != 32 && x != 48 && x != 64)) continue;
if (x > 16) printf("#ifdef TFM_HUGE\n");



printf(" case %d:\n", x);

for (y = 0; y < x; y++) {

printf(" x = %d; cy = 0;\n"
" LOOP_START;\n"
" _c = c + %d;\n"
" tmpm = m->dp;\n", y, y);

printf("#ifdef INNERMUL8\n");
for (z = 0; z+8 <= x; z += 8) {
printf(" INNERMUL8; _c += 8; tmpm += 8;\n");
}
for (; z < x; z++) {
printf(" INNERMUL; ++_c;\n");
}
printf("#else\n");
for (z = 0; z < x; z++) {
printf(" INNERMUL; ++_c;\n");
}
printf("#endif\n");
printf(" LOOP_END;\n"
" while (cy) {\n"
" PROPCARRY;\n"
" ++_c;\n"
" }\n");
}
//printf(" }\n");
printf(" break;\n");



#define LOOP_MACRO(stride) \
for (x = 0; x < stride; x++) { \
fp_digit cy = 0; \
/* get Mu for this round */ \
LOOP_START; \
_c = c + x; \
tmpm = m->dp; \
for (y = 0; y < stride; y++) { \
INNERMUL; \
++_c; \
} \
LOOP_END; \
while (cy) { \
PROPCARRY; \
++_c; \
} \
}





if (x > 16) printf("#endif /* TFM_HUGE */\n");


}

#if 0

printf(
" }\n"
"\n"
" /* now copy out */\n"
" _c = c + %d;\n"
" _c = c + pa;\n"
" tmpm = a->dp;\n"
" for (x = 0; x < %d+1; x++) {\n"
" for (x = 0; x < pa+1; x++) {\n"
" *tmpm++ = *_c++;\n"
" }\n"
"\n"
Expand All @@ -63,19 +116,17 @@ printf(
"\n"
" MONT_FINI;\n"
"\n"
" a->used = %d+1;\n"
" a->used = pa+1;\n"
" fp_clamp(a);\n"
"\n"
" /* if A >= m then A = A - m */\n"
" if (fp_cmp_mag (a, m) != FP_LT) {\n"
" s_fp_sub (a, m, a);\n"
" }\n"
"}\n", N,N,N,N,N,N,N,N);
}

return 0;
}

"}\n\n#endif\n");

#endif


return 0;
}
2 changes: 1 addition & 1 deletion demo/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ t1 = TIMFUNC();
sleep(1);
printf("Ticks per second: %llu\n", TIMFUNC() - t1);

goto expttime;
goto multtime;
/* do some timings... */
printf("Addition:\n");
for (t = 2; t <= FP_SIZE/2; t += 2) {
Expand Down
Binary file modified doc/tfm.pdf
Binary file not shown.
71 changes: 71 additions & 0 deletions fp_exptmod.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,75 @@
*/
#include <tfm.h>

#ifdef TFM_TIMING_RESISTANT

/* timing resistant montgomery ladder based exptmod
Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
*/
static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
{
fp_int R[2];
fp_digit buf, mp;
int err, bitcnt, digidx, y;

/* now setup montgomery */
if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
return err;
}

fp_init(&R[0]);
fp_init(&R[1]);

/* now we need R mod m */
fp_montgomery_calc_normalization (&R[0], P);

/* now set R[0][1] to G * R mod m */
if (fp_cmp_mag(P, G) != FP_GT) {
/* G > P so we reduce it first */
fp_mod(G, P, &R[1]);
} else {
fp_copy(G, &R[1]);
}
fp_mulmod (&R[1], &R[0], P, &R[1]);

/* for j = t-1 downto 0 do
r_!k = R0*R1; r_k = r_k^2
*/

/* set initial mode and bit cnt */
bitcnt = 1;
buf = 0;
digidx = X->used - 1;

for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
/* if digidx == -1 we are out of digits so break */
if (digidx == -1) {
break;
}
/* read next digit and reset bitcnt */
buf = X->dp[digidx--];
bitcnt = (int)DIGIT_BIT;
}

/* grab the next msb from the exponent */
y = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
buf <<= (fp_digit)1;

/* do ops */
fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
fp_sqr(&R[y], &R[y]); fp_montgomery_reduce(&R[y], P, mp);
}

fp_montgomery_reduce(&R[0], P, mp);
fp_copy(&R[0], Y);
return FP_OKAY;
}

#else

/* y = g**x (mod b)
* Some restrictions... x must be positive and < b
*/
Expand Down Expand Up @@ -168,6 +237,8 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
return FP_OKAY;
}

#endif


int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
{
Expand Down
Loading

0 comments on commit a6c4c5a

Please sign in to comment.