From b82def66a9247c2bcb7d86d5ad918623dcdf9739 Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Fri, 24 Mar 2023 16:13:53 -0400
Subject: [PATCH] implement `Float64()` conversion (#132)

Float64 returns the float64 value nearest to x.

    func (z *Int) Float64() float64

goos: linux
goarch: amd64
pkg: github.com/holiman/uint256
cpu: 12th Gen Intel(R) Core(TM) i7-1270P
BenchmarkFloat64/Float64/uint256-8                246834              4681 ns/op               0 B/op          0 allocs/op
BenchmarkFloat64/Float64/uint256-8                564375              4651 ns/op               0 B/op          0 allocs/op
BenchmarkFloat64/Float64/uint256-8                256735              4545 ns/op               0 B/op          0 allocs/op
BenchmarkFloat64/Float64/uint256-8                567476              2117 ns/op               0 B/op          0 allocs/op
BenchmarkFloat64/Float64/uint256-8                562767              2114 ns/op               0 B/op          0 allocs/op
BenchmarkFloat64/Float64/big-8                     23724             55516 ns/op           23424 B/op        510 allocs/op
BenchmarkFloat64/Float64/big-8                     18283             64730 ns/op           23424 B/op        510 allocs/op
BenchmarkFloat64/Float64/big-8                     18610             60523 ns/op           23424 B/op        510 allocs/op
BenchmarkFloat64/Float64/big-8                     19528             62700 ns/op           23424 B/op        510 allocs/op
BenchmarkFloat64/Float64/big-8                     19828             56352 ns/op           23424 B/op        510 allocs/op
---
 conversion.go      | 37 ++++++++++++++++++++++++++++++++++
 conversion_test.go | 49 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/conversion.go b/conversion.go
index f169f7d8..559451e2 100644
--- a/conversion.go
+++ b/conversion.go
@@ -13,6 +13,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"math"
 	"math/big"
 	"math/bits"
 	"strings"
@@ -76,6 +77,42 @@ func MustFromBig(b *big.Int) *Int {
 	return z
 }
 
+// Float64 returns the float64 value nearest to x.
+//
+// Note: The `big.Float` version of `Float64` also returns an 'Accuracy', indicating
+// whether the value was too small or too large to be represented by a
+// `float64`. However, the `uint256` type is unable to represent values
+// out of scope (|x| < math.SmallestNonzeroFloat64 or |x| > math.MaxFloat64),
+// therefore this method does not return any accuracy.
+func (z *Int) Float64() float64 {
+	if z.IsUint64() {
+		return float64(z.Uint64())
+	}
+	// See [1] for a detailed walkthrough of IEEE 754 conversion
+	//
+	// 1: https://www.wikihow.com/Convert-a-Number-from-Decimal-to-IEEE-754-Floating-Point-Representation
+
+	bitlen := uint64(z.BitLen())
+
+	// Normalize the number, by shifting it so that the MSB is shifted out.
+	y := new(Int).Lsh(z, uint(1+256-bitlen))
+	// The number with the leading 1 shifted out is the fraction.
+	fraction := y[3]
+
+	// The exp is calculated from the number of shifts, adjusted with the bias.
+	// double-precision uses 1023 as bias
+	biased_exp := 1023 + bitlen - 1
+
+	// The IEEE 754 double-precision layout is as follows:
+	//  1 sign bit (we don't bother with this, since it's always zero for uints)
+	// 11 exponent bits
+	// 52 fraction bits
+	// --------
+	// 64 bits
+
+	return math.Float64frombits(biased_exp<<52 | fraction>>12)
+}
+
 // SetFromHex sets z from the given string, interpreted as a hexadecimal number.
 // OBS! This method is _not_ strictly identical to the (*big.Int).SetString(..., 16) method.
 // Notable differences:
diff --git a/conversion_test.go b/conversion_test.go
index 132ad38f..46dc5c8f 100644
--- a/conversion_test.go
+++ b/conversion_test.go
@@ -1476,3 +1476,52 @@ func BenchmarkDecimal(b *testing.B) {
 		}
 	})
 }
+
+func testFloat64(t *testing.T, z *Int) {
+	bigF, _ := new(big.Float).SetInt(z.ToBig()).Float64()
+	_ = z.Float64() // Op must not modify the z
+	if have, want := z.Float64(), bigF; have != want {
+		t.Errorf("%s: have %f want %f", z.Hex(), have, want)
+	}
+}
+
+func TestFloat64(t *testing.T) {
+	for i := uint(0); i < 255; i++ {
+		z := NewInt(1)
+		testFloat64(t, z.Lsh(z, i))
+	}
+}
+
+func FuzzFloat64(f *testing.F) {
+	f.Fuzz(func(t *testing.T, aa, bb, cc, dd uint64) {
+		testFloat64(t, &Int{aa, bb, cc, dd})
+	})
+}
+
+func BenchmarkFloat64(b *testing.B) {
+	var u256Ints []*Int
+	var bigints []*big.Int
+
+	for i := uint(0); i < 255; i++ {
+		a := NewInt(1)
+		a.Lsh(a, i)
+		u256Ints = append(u256Ints, a)
+		bigints = append(bigints, a.ToBig())
+	}
+	b.Run("Float64/uint256", func(b *testing.B) {
+		b.ReportAllocs()
+		for i := 0; i < b.N; i++ {
+			for _, z := range u256Ints {
+				_ = z.Float64()
+			}
+		}
+	})
+	b.Run("Float64/big", func(b *testing.B) {
+		b.ReportAllocs()
+		for i := 0; i < b.N; i++ {
+			for _, z := range bigints {
+				_, _ = new(big.Float).SetInt(z).Float64()
+			}
+		}
+	})
+}