strconv: implement Ryū-like algorithm for atof

The Ryū algorithm as described in a paper by Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369) is better than Grisu3 because it handles all edge cases properly. In Grisu3, about 0.5% of float64 numbers fall back to the slow algorithm with can be 10-200 times slower. The core property used by the Ryū algorithm is that using sufficiently large precision for powers of 10 can eliminate all rounding edge cases. Such edge cases can be characterized by an equation of shape: m * P <= n * 2^k <= m * (P+1) where P is the fixed precision truncated version of the power of 10. Solving this equation can be done using Farey sequences to enumerate rationals n/m in the interval [P/2^k, (P+1)/2^k]. The original algorithm describes formatting to shortest decimal representation. This patch implements a variant of this algorithm for atof functions, using the properties: - 64-bit powers of 10 are enough to handle 31-bit decimal mantissas to parse float32 values - 128-bit powers of 10 are enough to handle 64-bit decimal mantissas to parse float64 values Since Grisu3 already uses 64-bit powers of ten, the difference in atof32 is hard to notice, but rather resides in much clearer logic. Powers of 10 are tabulated and will be reused for the ftoa implementation. AMD64 benchmarks: benchmark old ns/op new ns/op delta BenchmarkAtof64Decimal 38.6 38.5 -0.26% BenchmarkAtof64Float 49.9 49.6 -0.60% BenchmarkAtof64FloatExp 78.5 69.9 -10.96% BenchmarkAtof64FloatExact 125 141 +12.80% BenchmarkAtof64Big 148 161 +8.78% BenchmarkAtof64Hard 9946 120 -98.79% BenchmarkAtof64RandomBits 70.7 69.1 -2.26% BenchmarkAtof64RandomFloats 70.4 70.5 +0.14% BenchmarkAtof32Decimal 40.1 37.5 -6.48% BenchmarkAtof32Float 48.4 45.3 -6.40% BenchmarkAtof32FloatExp 87.1 74.1 -14.93% BenchmarkAtof32FloatHard 951 104 -89.06% BenchmarkAtof32Random 113 97.1 -14.07% ARM benchmarks: benchmark old ns/op new ns/op delta BenchmarkAtof64Decimal 670 659 -1.64% BenchmarkAtof64Float 2082 2050 -1.54% BenchmarkAtof64FloatExp 1137 1044 -8.18% BenchmarkAtof64FloatExact 1007 1623 +61.17% BenchmarkAtof64Big 1179 1361 +15.44% BenchmarkAtof64Hard 61099 1097 -98.20% BenchmarkAtof64RandomBits 646 634 -1.86% BenchmarkAtof64RandomFloats 639 627 -1.88% BenchmarkAtof32Decimal 823 824 +0.12% BenchmarkAtof32Float 2398 2364 -1.42% BenchmarkAtof32FloatExp 1294 1195 -7.65% BenchmarkAtof32FloatHard 6168 965 -84.35% BenchmarkAtof32Random 1175 1100 -6.38% Updates golang#15672 Change-Id: I297f2ffb038d7c4598e1365b61c13b30e9bdd7fc
remyoudompheng · Mar 29, 2019 · 051ede9 · 051ede9
1 parent 4a7cd9d
commit 051ede9
Show file tree

Hide file tree

Showing 4 changed files with 1,026 additions and 8 deletions.
diff --git a/src/strconv/atof.go b/src/strconv/atof.go
@@ -566,11 +566,29 @@ func atof32(s string) (f float32, err error) {
 				return f, nil
 			}
 		}
-		// Try another fast path.
-		ext := new(extFloat)
-		if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float32info); ok {
-			b, ovf := ext.floatBits(&float32info)
+		// Try Ryū-style method (float32).
+		ok := true
+		var b uint64
+		var ovf bool
+		if mantissa>>31 == 0 && !trunc {
+			// Ryu atof32 method.
+			b, ovf = ryuAtof32(uint32(mantissa), exp)
+		} else {
+			// Ryū atof64 method.
+			b, ovf = ryuAtof64(mantissa, exp, &float32info)
+			if trunc {
+				// check output for the rounded up mantissa.
+				b2, _ := ryuAtof64(mantissa+1, exp, &float32info)
+				if b != b2 {
+					ok = false
+				}
+			}
+		}
+		if ok {
 			f = math.Float32frombits(uint32(b))
+			if neg {
+				f = -f
+			}
 			if ovf {
 				err = rangeError(fnParseFloat, s)
 			}
@@ -608,11 +626,21 @@ func atof64(s string) (f float64, err error) {
 				return f, nil
 			}
 		}
-		// Try another fast path.
-		ext := new(extFloat)
-		if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float64info); ok {
-			b, ovf := ext.floatBits(&float64info)
+		// Try Ryū-style method.
+		ok := true
+		b, ovf := ryuAtof64(mantissa, exp, &float64info)
+		if trunc {
+			// check output for the rounded up mantissa.
+			b2, _ := ryuAtof64(mantissa+1, exp, &float64info)
+			if b != b2 {
+				ok = false
+			}
+		}
+		if ok {
 			f = math.Float64frombits(b)
+			if neg {
+				f = -f
+			}
 			if ovf {
 				err = rangeError(fnParseFloat, s)
 			}

diff --git a/src/strconv/atof_test.go b/src/strconv/atof_test.go
@@ -412,6 +412,9 @@ var atof32tests = []atofTest{
 	{"0x0.0000008p-125", "0", nil},             // rounded down
 	{"0x0.0000007p-125", "0", nil},             // rounded down
 
+	// Not so many digits but hard to round correctly.
+	{"376122878645e-046", "3.761229e-35", nil},
+
 	// 2^92 = 8388608p+69 = 4951760157141521099596496896 (4.9517602e27)
 	// is an exact power of two that needs 8 decimal digits to be correctly
 	// parsed back.
@@ -624,12 +627,24 @@ func BenchmarkAtof64FloatExp(b *testing.B) {
 	}
 }
 
+func BenchmarkAtof64FloatExact(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ParseFloat("1.1920928955078125", 64)
+	}
+}
+
 func BenchmarkAtof64Big(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		ParseFloat("123456789123456789123456789", 64)
 	}
 }
 
+func BenchmarkAtof64Hard(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ParseFloat("4.334126125515466e-210", 64)
+	}
+}
+
 func BenchmarkAtof64RandomBits(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		ParseFloat(benchmarksRandomBits[i%1024], 64)
@@ -660,6 +675,12 @@ func BenchmarkAtof32FloatExp(b *testing.B) {
 	}
 }
 
+func BenchmarkAtof32FloatHard(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		ParseFloat("376122878645e-046", 32)
+	}
+}
+
 var float32strings [4096]string
 
 func BenchmarkAtof32Random(b *testing.B) {

diff --git a/src/strconv/atofryu.go b/src/strconv/atofryu.go
@@ -0,0 +1,243 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strconv
+
+import (
+	"math/bits"
+)
+
+// decimal to binary conversion following the principles of the Ryū algorithm.
+//
+// See Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369)
+//
+// The steps are:
+// 1) Ensure the decimal input fits in 31/64 bits (for float32, float64)
+// 2) Multiply decimal by the requested power 10^e, truncating it down.
+//    This can be done using a fixed precision representation of 10^e
+//    tabulated in pow10wide and invpow10wide tables.
+// 3) Remove extra bits and apply rounding rules to obtain mantissa.
+
+func ryuAtof32(mant uint32, exp int) (fbits uint64, ovf bool) {
+	// 32-bit decimal mantissas cannot be accepted because
+	// of the (only) edge case 3465080571e-48 where we would
+	// end up off by one.
+	bitlen := bits.Len32(mant)
+	if bitlen >= 32 {
+		panic("ryuAtof32 called with decimal mantissa larger than 31 bits")
+	}
+	// Normalize input to be 31-bit wide.
+	e2 := 0
+	if bitlen < 31 {
+		mant <<= uint(31 - bitlen)
+		e2 = bitlen - 31
+	}
+
+	// multiply by a power of 10. It is required to know
+	// whether the computation is exact.
+	//
+	// Using a 64-bit representation (P+ε)*2^E of 10^q, the following
+	// property can be proved:
+	//     (mant × P) >> 69
+	// which is usually a 25-bit integer, is the correct lower
+	// truncation of (mant × (P+ε)) >> 69.
+	const atof32Shift = 69
+
+	var powmant uint64
+	var powexp int
+	switch {
+	case exp >= 39:
+		// float32 cannot be larger than 1e39
+		return 0xff << 23, true
+	case exp <= -65:
+		// float32 cannot be smaller than (1<<64) * 1e-65
+		return 0, false
+	case exp > 0:
+		powmant = pow10wide[exp][0]
+		powexp = pow10exp(exp) - 63
+	case exp == 0:
+		// no multiply
+	case exp < 0:
+		powmant = invpow10wide[-exp][0] + 1 // round up
+		powexp = pow10exp(exp) - 63
+	}
+	// Is it an exact computation?
+	// Only small positive powers of 10 are exact (5^28 has 66 bits).
+	exact := 27 >= exp && exp >= 0
+
+	var di uint64
+	if exp == 0 {
+		di = uint64(mant)
+	} else {
+		// Multiply and shift right by 69 bits.
+		hi, lo := bits.Mul64(uint64(mant), powmant)
+		di = hi >> (atof32Shift - 64)
+		d0 := hi&(1<<(atof32Shift-64)-1) == 0 && lo == 0
+		if !d0 {
+			exact = false
+		}
+		e2 += powexp + atof32Shift
+	}
+	// As a special case, computation might still be exact, if exponent
+	// was negative and if it amounts to computing an exact division.
+	// In that case, we ignore all lower bits.
+	// Note that division by 10^14 cannot be exact as 5^14 has 33 bits.
+	if exp < 0 && exp >= -13 && divisibleByPower5(uint64(mant), -exp) {
+		exact = true
+	}
+	return ryuFloatBits(di, e2, exact, &float32info)
+}
+
+func ryuAtof64(mant uint64, exp int, flt *floatInfo) (fbits uint64, ovf bool) {
+	bitlen := bits.Len64(mant)
+	e2 := 0
+	if bitlen < 64 {
+		mant <<= uint(64 - bitlen)
+		e2 = bitlen - 64
+	}
+
+	// multiply by a power of 10. It is required to know
+	// whether the computation is exact.
+	//
+	// Using a 128-bit representation (P+ε)*2^E of 10^q, the following
+	// property can be proved:
+	//     (mant × P) >> 137
+	// which is usually a 55-bit integer, is the correct lower
+	// truncation of (mant × (P+ε)) >> 137.
+	const atof64Shift = 137
+
+	var pow [2]uint64 // a representation of 10^q
+	var powexp int
+	switch {
+	case exp >= 309:
+		// float64 cannot be larger than 1e309
+		return 0x7ff << 52, true
+	case exp < -342:
+		// Even (1<<64-1) * 1e-343 converts to zero.
+		return 0, false
+	case exp > 0:
+		pow = pow10wide[exp]
+		powexp = pow10exp(exp) - 127
+	case exp == 0:
+		// no multiply
+	case exp < 0:
+		pow = invpow10wide[-exp]
+		powexp = pow10exp(exp) - 127
+	}
+	// Is it an exact computation?
+	// Only small positive powers of 10 are exact (5^55 has 128 bits).
+	exact := exp <= 55 && exp >= 0
+
+	// Compute the truncated down value of (x*10^q).
+	var di uint64
+	if exp == 0 {
+		di = mant
+	} else {
+		// Compute (mant * pow) >> atof64Shift
+		l1, l0 := bits.Mul64(mant, pow[1])
+		h1, h0 := bits.Mul64(mant, pow[0])
+		mid, carry := bits.Add64(l1, h0, 0)
+
+		di = h1 + carry
+		d0 := (di&(1<<(atof64Shift-128)-1) == 0) &&
+			mid == 0 && l0 == 0
+		if !d0 {
+			exact = false
+		}
+		di >>= (atof64Shift - 128)
+		e2 += powexp + atof64Shift
+	}
+	// As a special case, computation might still be exact, if exponent
+	// was negative and if it amounts to computing an exact division.
+	// In that case, we ignore all lower bits.
+	// Note that division by 10^28 cannot be exact as 5^28 has 66 bits.
+	if exp < 0 && exp >= -27 && divisibleByPower5(mant, -exp) {
+		exact = true
+	}
+	return ryuFloatBits(di, e2, exact, flt)
+}
+
+// ryuFloatBits returns the correctly rounded floating point
+// representation of a number x = (di + ε) * 2^e2,
+// where ε is an unknown number < 1, which is known to be zero
+// if 'exact' is true.
+func ryuFloatBits(di uint64, e2 int, exact bool, flt *floatInfo) (fbits uint64, ovf bool) {
+	mantbits := flt.mantbits
+	bias := flt.bias
+	expbits := flt.expbits
+	// If the input mantissa is too large, truncate it.
+	blen := bits.Len64(di)
+	e2 += blen - 1
+	extra := uint(blen) - mantbits - 1 // number of lower bits to remove
+	if e2 < bias+1 {
+		extra += uint(bias + 1 - e2)
+		e2 = bias + 1
+	}
+	if extra > uint(blen) {
+		return 0.0, false
+	}
+	// Compute correct rounding.
+	extramask := uint64(1<<extra - 1)
+	di, dfrac := di>>extra, di&extramask
+	roundUp := false
+	if exact {
+		// If we computed an exact product, d + 1/2
+		// should round to d+1 if 'd' is odd.
+		roundUp = dfrac > 1<<(extra-1) ||
+			(dfrac == 1<<(extra-1) && !exact) ||
+			(dfrac == 1<<(extra-1) && exact && di&1 == 1)
+	} else {
+		// otherwise, d+1/2 always rounds up because
+		// we truncated below.
+		roundUp = dfrac>>(extra-1) == 1
+	}
+	if dfrac != 0 {
+		exact = false
+	}
+	if roundUp {
+		di++
+	}
+
+	// Rounding might have added a bit; shift down.
+	if di == 2<<mantbits {
+		di >>= 1
+		e2++
+	}
+
+	// Infinities.
+	if e2-bias >= 1<<expbits-1 {
+		// ±Inf
+		di = 0
+		e2 = 1<<expbits - 1 + bias
+		ovf = true
+	} else if di&(1<<mantbits) == 0 {
+		// Denormalized?
+		e2 = bias
+	}
+	// Assemble bits.
+	fbits = di & (1<<mantbits - 1)
+	fbits |= (uint64(e2-bias) & (1<<expbits - 1)) << mantbits
+	return fbits, ovf
+}
+
+func divisibleByPower5(m uint64, k int) bool {
+	for i := 0; i < k; i++ {
+		a, b := divmod5(m)
+		if b != 0 {
+			return false
+		}
+		m = a
+	}
+	return true
+}
+
+func divmod5(x uint64) (uint64, uint64) {
+	if !host32bit {
+		return x / 5, x % 5
+	}
+	// Avoid runtime long division.
+	hi, _ := bits.Mul64(x, 0xcccccccccccccccd)
+	q := hi >> 2
+	return q, x - q*5
+}