From b82def66a9247c2bcb7d86d5ad918623dcdf9739 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Fri, 24 Mar 2023 16:13:53 -0400 Subject: [PATCH] implement `Float64()` conversion (#132) Float64 returns the float64 value nearest to x. func (z *Int) Float64() float64 goos: linux goarch: amd64 pkg: github.com/holiman/uint256 cpu: 12th Gen Intel(R) Core(TM) i7-1270P BenchmarkFloat64/Float64/uint256-8 246834 4681 ns/op 0 B/op 0 allocs/op BenchmarkFloat64/Float64/uint256-8 564375 4651 ns/op 0 B/op 0 allocs/op BenchmarkFloat64/Float64/uint256-8 256735 4545 ns/op 0 B/op 0 allocs/op BenchmarkFloat64/Float64/uint256-8 567476 2117 ns/op 0 B/op 0 allocs/op BenchmarkFloat64/Float64/uint256-8 562767 2114 ns/op 0 B/op 0 allocs/op BenchmarkFloat64/Float64/big-8 23724 55516 ns/op 23424 B/op 510 allocs/op BenchmarkFloat64/Float64/big-8 18283 64730 ns/op 23424 B/op 510 allocs/op BenchmarkFloat64/Float64/big-8 18610 60523 ns/op 23424 B/op 510 allocs/op BenchmarkFloat64/Float64/big-8 19528 62700 ns/op 23424 B/op 510 allocs/op BenchmarkFloat64/Float64/big-8 19828 56352 ns/op 23424 B/op 510 allocs/op --- conversion.go | 37 ++++++++++++++++++++++++++++++++++ conversion_test.go | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/conversion.go b/conversion.go index f169f7d8..559451e2 100644 --- a/conversion.go +++ b/conversion.go @@ -13,6 +13,7 @@ import ( "errors" "fmt" "io" + "math" "math/big" "math/bits" "strings" @@ -76,6 +77,42 @@ func MustFromBig(b *big.Int) *Int { return z } +// Float64 returns the float64 value nearest to x. +// +// Note: The `big.Float` version of `Float64` also returns an 'Accuracy', indicating +// whether the value was too small or too large to be represented by a +// `float64`. However, the `uint256` type is unable to represent values +// out of scope (|x| < math.SmallestNonzeroFloat64 or |x| > math.MaxFloat64), +// therefore this method does not return any accuracy. +func (z *Int) Float64() float64 { + if z.IsUint64() { + return float64(z.Uint64()) + } + // See [1] for a detailed walkthrough of IEEE 754 conversion + // + // 1: https://www.wikihow.com/Convert-a-Number-from-Decimal-to-IEEE-754-Floating-Point-Representation + + bitlen := uint64(z.BitLen()) + + // Normalize the number, by shifting it so that the MSB is shifted out. + y := new(Int).Lsh(z, uint(1+256-bitlen)) + // The number with the leading 1 shifted out is the fraction. + fraction := y[3] + + // The exp is calculated from the number of shifts, adjusted with the bias. + // double-precision uses 1023 as bias + biased_exp := 1023 + bitlen - 1 + + // The IEEE 754 double-precision layout is as follows: + // 1 sign bit (we don't bother with this, since it's always zero for uints) + // 11 exponent bits + // 52 fraction bits + // -------- + // 64 bits + + return math.Float64frombits(biased_exp<<52 | fraction>>12) +} + // SetFromHex sets z from the given string, interpreted as a hexadecimal number. // OBS! This method is _not_ strictly identical to the (*big.Int).SetString(..., 16) method. // Notable differences: diff --git a/conversion_test.go b/conversion_test.go index 132ad38f..46dc5c8f 100644 --- a/conversion_test.go +++ b/conversion_test.go @@ -1476,3 +1476,52 @@ func BenchmarkDecimal(b *testing.B) { } }) } + +func testFloat64(t *testing.T, z *Int) { + bigF, _ := new(big.Float).SetInt(z.ToBig()).Float64() + _ = z.Float64() // Op must not modify the z + if have, want := z.Float64(), bigF; have != want { + t.Errorf("%s: have %f want %f", z.Hex(), have, want) + } +} + +func TestFloat64(t *testing.T) { + for i := uint(0); i < 255; i++ { + z := NewInt(1) + testFloat64(t, z.Lsh(z, i)) + } +} + +func FuzzFloat64(f *testing.F) { + f.Fuzz(func(t *testing.T, aa, bb, cc, dd uint64) { + testFloat64(t, &Int{aa, bb, cc, dd}) + }) +} + +func BenchmarkFloat64(b *testing.B) { + var u256Ints []*Int + var bigints []*big.Int + + for i := uint(0); i < 255; i++ { + a := NewInt(1) + a.Lsh(a, i) + u256Ints = append(u256Ints, a) + bigints = append(bigints, a.ToBig()) + } + b.Run("Float64/uint256", func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, z := range u256Ints { + _ = z.Float64() + } + } + }) + b.Run("Float64/big", func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + for _, z := range bigints { + _, _ = new(big.Float).SetInt(z).Float64() + } + } + }) +}