added tomsfastmath-0.05

libtom · Jul 22, 2010 · a6c4c5a · a6c4c5a
1 parent f91cf2d
commit a6c4c5a
Show file tree

Hide file tree

Showing 21 changed files with 830 additions and 310 deletions.
diff --git a/changes.txt b/changes.txt
@@ -1,3 +1,10 @@
+August 1st, 2005
+0.05 -- Quick fix to the fp_invmod.c code to let it handle even moduli [required for LTC]
+     -- Added makefile.shared to make shared objects [required for LTC]
+     -- Improved makefiles to make them way more configurable
+     -- Added timing resistant fp_exptmod() enabled with TFM_TIMING_RESISTANT
+
+July 23rd, 2005
 0.04 -- Fixed bugs in the SSE2 squaring code
      -- Rewrote the multipliers to be optimized for small inputs 
      -- Nelson Bolyard of the NSS crew submitted [among other things] new faster Montgomery reduction

diff --git a/comba_mont_gen.c b/comba_mont_gen.c
@@ -1,59 +1,112 @@
-/* generate montgomery reductions for m->used = 1...16 */
-
 #include <stdio.h>
 
 int main(void)
 {
-   int N;
-
-   for (N = 1; N <= 16; N++) {
-
-printf("void fp_montgomery_reduce_%d(fp_int *a, fp_int *m, fp_digit mp)\n", N);
+   int x, y, z;
+
 printf(
+#if 0
+"#ifdef TFM_SMALL_SET\n"
+"/* computes x/R == x (mod N) via Montgomery Reduction */\n"
+"void fp_montgomery_reduce_small(fp_int *a, fp_int *m, fp_digit mp)\n"
 "{\n"
-"   fp_digit c[3*FP_SIZE], *_c, *tmpm, mu;\n"
-"   int      oldused, x, y;\n"
+"   fp_digit c[FP_SIZE], *_c, *tmpm, mu, cy;\n"
+"   int      oldused, x, y, pa;\n"
 "\n"
+"#if defined(USE_MEMSET)\n"
 "   /* now zero the buff */\n"
-"   memset(c, 0, sizeof(c));\n"
+"   memset(c, 0, sizeof c);\n"
+"#endif\n"
+"   pa = m->used;\n"
 "\n"
 "   /* copy the input */\n"
 "   oldused = a->used;\n"
 "   for (x = 0; x < oldused; x++) {\n"
 "       c[x] = a->dp[x];\n"
 "   }\n"
-"\n"
+"#if !defined(USE_MEMSET)\n"
+"   for (; x < 2*pa+3; x++) {\n"
+"       c[x] = 0;\n"
+"   }\n"
+"#endif\n"
 "   MONT_START;\n"
+#endif
 "\n"
-"   /* now let's get bizz-sy! */\n"
-"   for (x = 0; x < %d; x++) {\n"
-"       /* get Mu for this round */\n"
-"       LOOP_START;\n"
-"\n"
-"       /* our friendly neighbourhood alias */\n"
-"       _c   = c + x;\n"
-"       tmpm = m->dp;\n"
-"\n"
-"       for (y = 0; y < %d; y++) {\n"
-"          INNERMUL;\n"
-"          ++_c;\n"
-"       }\n"
-"       /* send carry up man... */\n"
-"       _c = c + x;\n"
-"       PROPCARRY;\n"
-"   }         \n"
-"\n"
-"  /* fix the rest of the carries */\n"
-"  _c = c + %d;\n"
-"  for (x = %d; x < %d * 2 + 2; x++) {\n"
-"     PROPCARRY;\n"
-"     ++_c;\n"
+"   switch (pa) {\n");
+
+for (x = 1; x <= 64; x++) {
+if (x > 16 && (x != 32 && x != 48 && x != 64)) continue;
+if (x > 16) printf("#ifdef TFM_HUGE\n");
+
+
+
+printf("      case %d:\n", x);
+
+for (y = 0; y < x; y++) {
+
+printf("            x = %d; cy   = 0;\n"
+       "            LOOP_START;\n"
+       "            _c   = c + %d;\n"
+       "            tmpm = m->dp;\n", y, y);
+
+printf("#ifdef INNERMUL8\n");
+for (z = 0; z+8 <= x; z += 8) {
+printf("            INNERMUL8; _c += 8; tmpm += 8;\n");
+}
+for (; z < x; z++) {
+printf("            INNERMUL; ++_c;\n");
+}
+printf("#else\n");
+for (z = 0; z < x; z++) {
+printf("            INNERMUL; ++_c;\n");
+}
+printf("#endif\n");
+printf("            LOOP_END;\n"
+       "            while (cy) {\n"
+       "               PROPCARRY;\n"
+       "               ++_c;\n"
+       "            }\n");
+}
+//printf("         }\n");
+printf("         break;\n");
+
+
+
+#define LOOP_MACRO(stride)                                 \
+   for (x = 0; x < stride; x++) {                          \
+       fp_digit cy = 0;                                    \
+       /* get Mu for this round */                         \
+       LOOP_START;                                         \
+       _c   = c + x;                                       \
+       tmpm = m->dp;                                       \
+       for (y = 0; y < stride; y++) {                      \
+          INNERMUL;                                        \
+          ++_c;                                            \
+       }                                                   \
+       LOOP_END;                                           \
+       while (cy) {                                        \
+           PROPCARRY;                                      \
+           ++_c;                                           \
+       }                                                   \
+  }         
+
+
+
+
+
+if (x > 16) printf("#endif /* TFM_HUGE */\n");
+
+
+}
+
+#if 0
+
+printf(
 "  }\n"
-"\n"
 "  /* now copy out */\n"
-"  _c   = c + %d;\n"
+"  _c   = c + pa;\n"
 "  tmpm = a->dp;\n"
-"  for (x = 0; x < %d+1; x++) {\n"
+"  for (x = 0; x < pa+1; x++) {\n"
 "     *tmpm++ = *_c++;\n"
 "  }\n"
 "\n"
@@ -63,19 +116,17 @@ printf(
 "\n"
 "  MONT_FINI;\n"
 "\n"
-"  a->used = %d+1;\n"
+"  a->used = pa+1;\n"
 "  fp_clamp(a);\n"
 "\n"  
 "  /* if A >= m then A = A - m */\n"
 "  if (fp_cmp_mag (a, m) != FP_LT) {\n"
 "    s_fp_sub (a, m, a);\n"
 "  }\n"
-"}\n", N,N,N,N,N,N,N,N);
-}
-
-return 0;
-}
-
+"}\n\n#endif\n");
 
+#endif
 
 
+return 0;
+}
diff --git a/demo/test.c b/demo/test.c
@@ -213,7 +213,7 @@ t1 = TIMFUNC();
 sleep(1);
 printf("Ticks per second: %llu\n", TIMFUNC() - t1);
 
-goto expttime;
+goto multtime;
  /* do some timings... */
   printf("Addition:\n");
   for (t = 2; t <= FP_SIZE/2; t += 2) {

diff --git a/doc/tfm.pdf b/doc/tfm.pdf
diff --git a/fp_exptmod.c b/fp_exptmod.c
@@ -9,6 +9,75 @@
  */
 #include <tfm.h>
 
+#ifdef TFM_TIMING_RESISTANT
+
+/* timing resistant montgomery ladder based exptmod 
+
+   Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002
+*/
+static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
+{
+  fp_int   R[2];
+  fp_digit buf, mp;
+  int      err, bitcnt, digidx, y;
+
+  /* now setup montgomery  */
+  if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
+     return err;
+  }
+
+  fp_init(&R[0]);   
+  fp_init(&R[1]);   
+
+  /* now we need R mod m */
+  fp_montgomery_calc_normalization (&R[0], P);
+
+  /* now set R[0][1] to G * R mod m */
+  if (fp_cmp_mag(P, G) != FP_GT) {
+     /* G > P so we reduce it first */
+     fp_mod(G, P, &R[1]);
+  } else {
+     fp_copy(G, &R[1]);
+  }
+  fp_mulmod (&R[1], &R[0], P, &R[1]);
+
+  /* for j = t-1 downto 0 do
+        r_!k = R0*R1; r_k = r_k^2
+  */
+
+  /* set initial mode and bit cnt */
+  bitcnt = 1;
+  buf    = 0;
+  digidx = X->used - 1;
+
+  for (;;) {
+    /* grab next digit as required */
+    if (--bitcnt == 0) {
+      /* if digidx == -1 we are out of digits so break */
+      if (digidx == -1) {
+        break;
+      }
+      /* read next digit and reset bitcnt */
+      buf    = X->dp[digidx--];
+      bitcnt = (int)DIGIT_BIT;
+    }
+
+    /* grab the next msb from the exponent */
+    y     = (fp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
+    buf <<= (fp_digit)1;
+
+    /* do ops */
+    fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp);
+    fp_sqr(&R[y], &R[y]);          fp_montgomery_reduce(&R[y], P, mp);
+  }
+
+   fp_montgomery_reduce(&R[0], P, mp);
+   fp_copy(&R[0], Y);
+   return FP_OKAY;
+}   
+
+#else
+
 /* y = g**x (mod b) 
  * Some restrictions... x must be positive and < b
  */
@@ -168,6 +237,8 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
   return FP_OKAY;
 }
 
+#endif
+
 
 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
 {