From e71e1b81da930755dca6fc9be401f14abed455e0 Mon Sep 17 00:00:00 2001
From: Jimmy Song <jaejoon@gmail.com>
Date: Thu, 22 Jan 2015 10:13:00 -0600
Subject: [PATCH] Optimize ScalarMult with NAF

Use Non-Adjacent Form (NAF) of large numbers to reduce ScalarMult computation times.

Preliminary results indicate around a 8-9% speed improvement according to BenchmarkScalarMult.

The algorithm used is 3.77 from Guide to Elliptical Curve Crytography by Hankerson, et al.

This closes #3
---
 bench_test.go |   8 +++
 btcec.go      | 146 +++++++++++++++++++++++++++++++++++++-------------
 btcec_test.go |  30 +++++++++++
 3 files changed, 148 insertions(+), 36 deletions(-)

diff --git a/bench_test.go b/bench_test.go
index e306ef4..e406570 100644
--- a/bench_test.go
+++ b/bench_test.go
@@ -78,6 +78,14 @@ func BenchmarkScalarMult(b *testing.B) {
 	}
 }
 
+// BenchmarkNAF benchmarks the NAF function.
+func BenchmarkNAF(b *testing.B) {
+	k := fromHex("d74bf844b0862475103d96a611cf2d898447e288d34b360bc885cb8ce7c00575")
+	for i := 0; i < b.N; i++ {
+		btcec.NAF(k.Bytes())
+	}
+}
+
 // BenchmarkSigVerify benchmarks how long it takes the secp256k1 curve to
 // verify signatures.
 func BenchmarkSigVerify(b *testing.B) {
diff --git a/btcec.go b/btcec.go
index 233ab4f..846b51c 100644
--- a/btcec.go
+++ b/btcec.go
@@ -665,6 +665,74 @@ func (curve *KoblitzCurve) moduloReduce(k []byte) []byte {
 	}
 }
 
+// NAF takes a positive integer k and returns the Non-Adjacent Form (NAF)
+// which is expressed as a list of integers which are all 1, 0 or -1.
+// NAF is also convenient in that on average, only 1/3rd of its values are
+// non-zero.
+// The algorithm here is from Guide to Elliptical Cryptography 3.30 (ref above)
+// Essentially, this makes it possible to minimize the number of operations
+// since the resulting ints returned will be at least 50% 0's.
+func NAF(k []byte) []int {
+
+	// Flatten out k into its constituent bits.
+	// 0x57 => [0 0 1 0 1 0 1 1 1]. This is 0x57 in binary but in 9 bits.
+	// The extra 0 at the front is needed because the size of what we return
+	// is 1 more than the number of bits k has.
+	bits := make([]int, len(k)*8+1)
+	lenBits := len(bits)
+	for i, byteVal := range k {
+		for j := 7; j >= 0; j-- {
+			if byteVal&1 == 1 {
+				bits[8*i+j+1] = 1
+			}
+			byteVal >>= 1
+		}
+	}
+
+	// The essence of this algorithm is that whenever we have consecutive 1s
+	// in the binary, we want to put a -1 in the lowest bit and get a bunch of
+	// 0s up to the highest bit of consecutive 1s. This is due to this identity:
+	// 2^n + 2^(n-1) + 2^(n-2) + ... + 2^(n-k) = 2^(n+1) - 2^(n-k)
+	// The algorithm thus may need to go 1 more bit than the length of the bits
+	// we actually have, hence bits being 1 bit longer than was necessary.
+	// We iterate the bits in reverse since we need to start at the lowest bit.
+	var carry, nextIsOne bool
+	ret := make([]int, len(k)*8+1)
+	for i := lenBits - 1; i >= 0; i-- {
+		bit := bits[i]
+		nextIsOne = i > 0 && bits[i-1] == 1
+		if carry {
+			if bit == 0 {
+				// We've hit a 0 after some number of 1s.
+				if nextIsOne {
+					// We start carrying again since we're starting
+					// a new sequence of 1s.
+					ret[i] = -1
+				} else {
+					// We stop carrying since 1s have stopped.
+					carry = false
+					ret[i] = 1
+				}
+			} else {
+				// This bit is 1, so we continue to carry and
+				// don't need to do anything
+			}
+		} else if bit == 1 {
+			if nextIsOne {
+				// if this is the start of at least 2 consecutive 1's
+				// we want to set the current one to -1 and start carrying
+				ret[i] = -1
+				carry = true
+			} else {
+				// this is a singleton, not consecutive 1's.
+				ret[i] = 1
+			}
+		}
+	}
+
+	return ret
+}
+
 // ScalarMult returns k*(Bx, By) where k is a big endian integer.
 // Part of the elliptic.Curve interface.
 func (curve *KoblitzCurve) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big.Int) {
@@ -675,66 +743,72 @@ func (curve *KoblitzCurve) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big
 	// see Algorithm 3.74 in Guide to Elliptical Curve Cryptography by
 	// Hankerson, et al.
 	k1, k2, signK1, signK2 := curve.splitK(curve.moduloReduce(k))
-	k1Len := len(k1)
-	k2Len := len(k2)
-	m := k1Len
-	if k2Len > m {
-		m = k2Len
-	}
 
 	// The main equation here to remember is
 	// k * P = k1 * P + k2 * ϕ(P)
 	// P1 below is P in the equation, P2 below is ϕ(P) in the equation
 	p1x, p1y := curve.bigAffineToField(Bx, By)
+	// For NAF, we need the negative point
+	p1yNeg := new(fieldVal).Set(p1y).Negate(1)
 	p1z := new(fieldVal).SetInt(1)
 	// Note ϕ(x,y) = (βx,y), the Jacobian z coordinate is 1, so this math
 	// goes through.
 	p2x := new(fieldVal).Set(p1x).Mul(curve.beta)
 	p2y := new(fieldVal).Set(p1y)
+	// For NAF, we need the negative point
+	p2yNeg := new(fieldVal).Set(p2y).Negate(1)
 	p2z := new(fieldVal).SetInt(1)
 
-	// If k1 or k2 are negative, we only need to flip the y of the respective
-	// Jacobian point. In ECC terms, we're reflecting the point over the
-	// x-axis which is guaranteed to still be on the curve.
+	// If k1 or k2 are negative, we flip the positive/negative values
 	if signK1 == -1 {
-		p1y.Negate(1)
+		p1y, p1yNeg = p1yNeg, p1y
 	}
 	if signK2 == -1 {
-		p2y.Negate(1)
+		p2y, p2yNeg = p2yNeg, p2y
 	}
 
-	// We use the left to right binary addition method.
-	// At each bit of k1 and k2, we add the current part of the
-	// k * P = k1 * P + k2 * ϕ(P) equation (that is, P1 and P2) and double.
-	// A further optimization using NAF is possible here but unimplemented.
-	var byteVal1, byteVal2 byte
+	// NAF versions of k1 and k2 should have a lot more zeros
+	k1NAF := NAF(k1)
+	k2NAF := NAF(k2)
+	k1Len := len(k1NAF)
+	k2Len := len(k2NAF)
+
+	m := k1Len
+	if m < k2Len {
+		m = k2Len
+	}
+
+	// We add left-to-right using the NAF optimization. This is using
+	// algorithm 3.77 from Guide to Elliptical Curve Cryptography.
+	// This should be faster overall since there will be a lot more instances
+	// of 0, hence reducing the number of Jacobian additions at the cost
+	// of 1 possible extra doubling.
+	var n1, n2 int
 	for i := 0; i < m; i++ {
-		// Note that if k1 or k2 has less than the max number of bytes, we
-		// want to ignore the bytes at the front since we're going left to
-		// right.
+		// Q = 2 * Q
+		curve.doubleJacobian(qx, qy, qz, qx, qy, qz)
+
+		// Since we're going left-to-right, we need to pad the front with 0's
 		if i < m-k1Len {
-			byteVal1 = 0
+			n1 = 0
 		} else {
-			byteVal1 = k1[i-m+k1Len]
+			n1 = k1NAF[i-m+k1Len]
+		}
+		if n1 == 1 {
+			curve.addJacobian(qx, qy, qz, p1x, p1y, p1z, qx, qy, qz)
+		} else if n1 == -1 {
+			curve.addJacobian(qx, qy, qz, p1x, p1yNeg, p1z, qx, qy, qz)
 		}
+		// Since we're going left-to-right, we need to pad the front with 0's
 		if i < m-k2Len {
-			byteVal2 = 0
+			n2 = 0
 		} else {
-			byteVal2 = k2[i-m+k2Len]
+			n2 = k2NAF[i-m+k2Len]
 		}
-		for bitNum := 0; bitNum < 8; bitNum++ {
-			// Q = 2*Q
-			curve.doubleJacobian(qx, qy, qz, qx, qy, qz)
-			if byteVal1&0x80 == 0x80 {
-				// Q = Q + P1
-				curve.addJacobian(qx, qy, qz, p1x, p1y, p1z, qx, qy, qz)
-			}
-			if byteVal2&0x80 == 0x80 {
-				// Q = Q + P2
-				curve.addJacobian(qx, qy, qz, p2x, p2y, p2z, qx, qy, qz)
-			}
-			byteVal1 <<= 1
-			byteVal2 <<= 1
+		if n2 == 1 {
+			curve.addJacobian(qx, qy, qz, p2x, p2y, p2z, qx, qy, qz)
+		} else if n2 == -1 {
+			curve.addJacobian(qx, qy, qz, p2x, p2yNeg, p2z, qx, qy, qz)
 		}
 	}
 
diff --git a/btcec_test.go b/btcec_test.go
index 054eeb0..bf7f61b 100644
--- a/btcec_test.go
+++ b/btcec_test.go
@@ -657,6 +657,36 @@ func TestSignAndVerify(t *testing.T) {
 	testSignAndVerify(t, btcec.S256(), "S256")
 }
 
+func TestNAF(t *testing.T) {
+	negOne := big.NewInt(-1)
+	one := big.NewInt(1)
+	two := big.NewInt(2)
+	for i := 0; i < 1024; i++ {
+		data := make([]byte, 32)
+		_, err := rand.Read(data)
+		if err != nil {
+			t.Fatalf("failed to read random data at %d", i)
+			break
+		}
+		naf := btcec.NAF(data)
+		want := new(big.Int).SetBytes(data)
+		got := big.NewInt(0)
+		// Check that the NAF representation comes up with the right number
+		for _, cur := range naf {
+			got.Mul(got, two)
+			if cur == 1 {
+				got.Add(got, one)
+			} else if cur == -1 {
+				got.Add(got, negOne)
+			}
+		}
+		if got.Cmp(want) != 0 {
+			t.Errorf("%d: Failed NAF got %X want %X", i, got, want)
+		}
+	}
+
+}
+
 func fromHex(s string) *big.Int {
 	r, ok := new(big.Int).SetString(s, 16)
 	if !ok {