Skip to content

Commit

Permalink
strconv: implement Ryū-like algorithm for atof
Browse files Browse the repository at this point in the history
The Ryū algorithm as described in a paper by Ulf Adams,
"Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369)
is better than Grisu3 because it handles all edge cases properly.
In Grisu3, about 0.5% of float64 numbers fall back to the slow
algorithm with can be 10-200 times slower.

The core property used by the Ryū algorithm is that using
sufficiently large precision for powers of 10 can eliminate
all rounding edge cases. Such edge cases can be characterized
by an equation of shape:
    m * P <= n * 2^k <= m * (P+1)
where P is the fixed precision truncated version of the power of 10.
Solving this equation can be done using Farey sequences to enumerate
rationals n/m in the interval [P/2^k, (P+1)/2^k].

The original algorithm describes formatting to shortest decimal
representation. This patch implements a variant of this algorithm
for atof functions, using the properties:
- 64-bit powers of 10 are enough to handle 31-bit decimal mantissas
  to parse float32 values
- 128-bit powers of 10 are enough to handle 64-bit decimal mantissas
  to parse float64 values

Since Grisu3 already uses 64-bit powers of ten, the difference
in atof32 is hard to notice, but rather resides in much clearer
logic.

Powers of 10 are tabulated and will be reused for the ftoa
implementation.

AMD64 benchmarks:

benchmark                     old ns/op     new ns/op     delta
BenchmarkAtof64Decimal        38.6          38.5          -0.26%
BenchmarkAtof64Float          49.9          49.6          -0.60%
BenchmarkAtof64FloatExp       78.5          69.9          -10.96%
BenchmarkAtof64FloatExact     125           141           +12.80%
BenchmarkAtof64Big            148           161           +8.78%
BenchmarkAtof64Hard           9946          120           -98.79%
BenchmarkAtof64RandomBits     70.7          69.1          -2.26%
BenchmarkAtof64RandomFloats   70.4          70.5          +0.14%
BenchmarkAtof32Decimal        40.1          37.5          -6.48%
BenchmarkAtof32Float          48.4          45.3          -6.40%
BenchmarkAtof32FloatExp       87.1          74.1          -14.93%
BenchmarkAtof32FloatHard      951           104           -89.06%
BenchmarkAtof32Random         113           97.1          -14.07%

ARM benchmarks:

benchmark                     old ns/op     new ns/op     delta
BenchmarkAtof64Decimal        670           659           -1.64%
BenchmarkAtof64Float          2082          2050          -1.54%
BenchmarkAtof64FloatExp       1137          1044          -8.18%
BenchmarkAtof64FloatExact     1007          1623          +61.17%
BenchmarkAtof64Big            1179          1361          +15.44%
BenchmarkAtof64Hard           61099         1097          -98.20%
BenchmarkAtof64RandomBits     646           634           -1.86%
BenchmarkAtof64RandomFloats   639           627           -1.88%
BenchmarkAtof32Decimal        823           824           +0.12%
BenchmarkAtof32Float          2398          2364          -1.42%
BenchmarkAtof32FloatExp       1294          1195          -7.65%
BenchmarkAtof32FloatHard      6168          965           -84.35%
BenchmarkAtof32Random         1175          1100          -6.38%

Updates golang#15672

Change-Id: I297f2ffb038d7c4598e1365b61c13b30e9bdd7fc
  • Loading branch information
remyoudompheng committed Mar 29, 2019
1 parent 4a7cd9d commit 051ede9
Show file tree
Hide file tree
Showing 4 changed files with 1,026 additions and 8 deletions.
44 changes: 36 additions & 8 deletions src/strconv/atof.go
Original file line number Diff line number Diff line change
Expand Up @@ -566,11 +566,29 @@ func atof32(s string) (f float32, err error) {
return f, nil
}
}
// Try another fast path.
ext := new(extFloat)
if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float32info); ok {
b, ovf := ext.floatBits(&float32info)
// Try Ryū-style method (float32).
ok := true
var b uint64
var ovf bool
if mantissa>>31 == 0 && !trunc {
// Ryu atof32 method.
b, ovf = ryuAtof32(uint32(mantissa), exp)
} else {
// Ryū atof64 method.
b, ovf = ryuAtof64(mantissa, exp, &float32info)
if trunc {
// check output for the rounded up mantissa.
b2, _ := ryuAtof64(mantissa+1, exp, &float32info)
if b != b2 {
ok = false
}
}
}
if ok {
f = math.Float32frombits(uint32(b))
if neg {
f = -f
}
if ovf {
err = rangeError(fnParseFloat, s)
}
Expand Down Expand Up @@ -608,11 +626,21 @@ func atof64(s string) (f float64, err error) {
return f, nil
}
}
// Try another fast path.
ext := new(extFloat)
if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float64info); ok {
b, ovf := ext.floatBits(&float64info)
// Try Ryū-style method.
ok := true
b, ovf := ryuAtof64(mantissa, exp, &float64info)
if trunc {
// check output for the rounded up mantissa.
b2, _ := ryuAtof64(mantissa+1, exp, &float64info)
if b != b2 {
ok = false
}
}
if ok {
f = math.Float64frombits(b)
if neg {
f = -f
}
if ovf {
err = rangeError(fnParseFloat, s)
}
Expand Down
21 changes: 21 additions & 0 deletions src/strconv/atof_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,9 @@ var atof32tests = []atofTest{
{"0x0.0000008p-125", "0", nil}, // rounded down
{"0x0.0000007p-125", "0", nil}, // rounded down

// Not so many digits but hard to round correctly.
{"376122878645e-046", "3.761229e-35", nil},

// 2^92 = 8388608p+69 = 4951760157141521099596496896 (4.9517602e27)
// is an exact power of two that needs 8 decimal digits to be correctly
// parsed back.
Expand Down Expand Up @@ -624,12 +627,24 @@ func BenchmarkAtof64FloatExp(b *testing.B) {
}
}

func BenchmarkAtof64FloatExact(b *testing.B) {
for i := 0; i < b.N; i++ {
ParseFloat("1.1920928955078125", 64)
}
}

func BenchmarkAtof64Big(b *testing.B) {
for i := 0; i < b.N; i++ {
ParseFloat("123456789123456789123456789", 64)
}
}

func BenchmarkAtof64Hard(b *testing.B) {
for i := 0; i < b.N; i++ {
ParseFloat("4.334126125515466e-210", 64)
}
}

func BenchmarkAtof64RandomBits(b *testing.B) {
for i := 0; i < b.N; i++ {
ParseFloat(benchmarksRandomBits[i%1024], 64)
Expand Down Expand Up @@ -660,6 +675,12 @@ func BenchmarkAtof32FloatExp(b *testing.B) {
}
}

func BenchmarkAtof32FloatHard(b *testing.B) {
for i := 0; i < b.N; i++ {
ParseFloat("376122878645e-046", 32)
}
}

var float32strings [4096]string

func BenchmarkAtof32Random(b *testing.B) {
Expand Down
243 changes: 243 additions & 0 deletions src/strconv/atofryu.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package strconv

import (
"math/bits"
)

// decimal to binary conversion following the principles of the Ryū algorithm.
//
// See Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369)
//
// The steps are:
// 1) Ensure the decimal input fits in 31/64 bits (for float32, float64)
// 2) Multiply decimal by the requested power 10^e, truncating it down.
// This can be done using a fixed precision representation of 10^e
// tabulated in pow10wide and invpow10wide tables.
// 3) Remove extra bits and apply rounding rules to obtain mantissa.

func ryuAtof32(mant uint32, exp int) (fbits uint64, ovf bool) {
// 32-bit decimal mantissas cannot be accepted because
// of the (only) edge case 3465080571e-48 where we would
// end up off by one.
bitlen := bits.Len32(mant)
if bitlen >= 32 {
panic("ryuAtof32 called with decimal mantissa larger than 31 bits")
}
// Normalize input to be 31-bit wide.
e2 := 0
if bitlen < 31 {
mant <<= uint(31 - bitlen)
e2 = bitlen - 31
}

// multiply by a power of 10. It is required to know
// whether the computation is exact.
//
// Using a 64-bit representation (P+ε)*2^E of 10^q, the following
// property can be proved:
// (mant × P) >> 69
// which is usually a 25-bit integer, is the correct lower
// truncation of (mant × (P+ε)) >> 69.
const atof32Shift = 69

var powmant uint64
var powexp int
switch {
case exp >= 39:
// float32 cannot be larger than 1e39
return 0xff << 23, true
case exp <= -65:
// float32 cannot be smaller than (1<<64) * 1e-65
return 0, false
case exp > 0:
powmant = pow10wide[exp][0]
powexp = pow10exp(exp) - 63
case exp == 0:
// no multiply
case exp < 0:
powmant = invpow10wide[-exp][0] + 1 // round up
powexp = pow10exp(exp) - 63
}
// Is it an exact computation?
// Only small positive powers of 10 are exact (5^28 has 66 bits).
exact := 27 >= exp && exp >= 0

var di uint64
if exp == 0 {
di = uint64(mant)
} else {
// Multiply and shift right by 69 bits.
hi, lo := bits.Mul64(uint64(mant), powmant)
di = hi >> (atof32Shift - 64)
d0 := hi&(1<<(atof32Shift-64)-1) == 0 && lo == 0
if !d0 {
exact = false
}
e2 += powexp + atof32Shift
}
// As a special case, computation might still be exact, if exponent
// was negative and if it amounts to computing an exact division.
// In that case, we ignore all lower bits.
// Note that division by 10^14 cannot be exact as 5^14 has 33 bits.
if exp < 0 && exp >= -13 && divisibleByPower5(uint64(mant), -exp) {
exact = true
}
return ryuFloatBits(di, e2, exact, &float32info)
}

func ryuAtof64(mant uint64, exp int, flt *floatInfo) (fbits uint64, ovf bool) {
bitlen := bits.Len64(mant)
e2 := 0
if bitlen < 64 {
mant <<= uint(64 - bitlen)
e2 = bitlen - 64
}

// multiply by a power of 10. It is required to know
// whether the computation is exact.
//
// Using a 128-bit representation (P+ε)*2^E of 10^q, the following
// property can be proved:
// (mant × P) >> 137
// which is usually a 55-bit integer, is the correct lower
// truncation of (mant × (P+ε)) >> 137.
const atof64Shift = 137

var pow [2]uint64 // a representation of 10^q
var powexp int
switch {
case exp >= 309:
// float64 cannot be larger than 1e309
return 0x7ff << 52, true
case exp < -342:
// Even (1<<64-1) * 1e-343 converts to zero.
return 0, false
case exp > 0:
pow = pow10wide[exp]
powexp = pow10exp(exp) - 127
case exp == 0:
// no multiply
case exp < 0:
pow = invpow10wide[-exp]
powexp = pow10exp(exp) - 127
}
// Is it an exact computation?
// Only small positive powers of 10 are exact (5^55 has 128 bits).
exact := exp <= 55 && exp >= 0

// Compute the truncated down value of (x*10^q).
var di uint64
if exp == 0 {
di = mant
} else {
// Compute (mant * pow) >> atof64Shift
l1, l0 := bits.Mul64(mant, pow[1])
h1, h0 := bits.Mul64(mant, pow[0])
mid, carry := bits.Add64(l1, h0, 0)

di = h1 + carry
d0 := (di&(1<<(atof64Shift-128)-1) == 0) &&
mid == 0 && l0 == 0
if !d0 {
exact = false
}
di >>= (atof64Shift - 128)
e2 += powexp + atof64Shift
}
// As a special case, computation might still be exact, if exponent
// was negative and if it amounts to computing an exact division.
// In that case, we ignore all lower bits.
// Note that division by 10^28 cannot be exact as 5^28 has 66 bits.
if exp < 0 && exp >= -27 && divisibleByPower5(mant, -exp) {
exact = true
}
return ryuFloatBits(di, e2, exact, flt)
}

// ryuFloatBits returns the correctly rounded floating point
// representation of a number x = (di + ε) * 2^e2,
// where ε is an unknown number < 1, which is known to be zero
// if 'exact' is true.
func ryuFloatBits(di uint64, e2 int, exact bool, flt *floatInfo) (fbits uint64, ovf bool) {
mantbits := flt.mantbits
bias := flt.bias
expbits := flt.expbits
// If the input mantissa is too large, truncate it.
blen := bits.Len64(di)
e2 += blen - 1
extra := uint(blen) - mantbits - 1 // number of lower bits to remove
if e2 < bias+1 {
extra += uint(bias + 1 - e2)
e2 = bias + 1
}
if extra > uint(blen) {
return 0.0, false
}
// Compute correct rounding.
extramask := uint64(1<<extra - 1)
di, dfrac := di>>extra, di&extramask
roundUp := false
if exact {
// If we computed an exact product, d + 1/2
// should round to d+1 if 'd' is odd.
roundUp = dfrac > 1<<(extra-1) ||
(dfrac == 1<<(extra-1) && !exact) ||
(dfrac == 1<<(extra-1) && exact && di&1 == 1)
} else {
// otherwise, d+1/2 always rounds up because
// we truncated below.
roundUp = dfrac>>(extra-1) == 1
}
if dfrac != 0 {
exact = false
}
if roundUp {
di++
}

// Rounding might have added a bit; shift down.
if di == 2<<mantbits {
di >>= 1
e2++
}

// Infinities.
if e2-bias >= 1<<expbits-1 {
// ±Inf
di = 0
e2 = 1<<expbits - 1 + bias
ovf = true
} else if di&(1<<mantbits) == 0 {
// Denormalized?
e2 = bias
}
// Assemble bits.
fbits = di & (1<<mantbits - 1)
fbits |= (uint64(e2-bias) & (1<<expbits - 1)) << mantbits
return fbits, ovf
}

func divisibleByPower5(m uint64, k int) bool {
for i := 0; i < k; i++ {
a, b := divmod5(m)
if b != 0 {
return false
}
m = a
}
return true
}

func divmod5(x uint64) (uint64, uint64) {
if !host32bit {
return x / 5, x % 5
}
// Avoid runtime long division.
hi, _ := bits.Mul64(x, 0xcccccccccccccccd)
q := hi >> 2
return q, x - q*5
}
Loading

0 comments on commit 051ede9

Please sign in to comment.