From e0c1932523fdddcbf1b610442eb6f6f51f94c189 Mon Sep 17 00:00:00 2001
From: Nathan Daly <NHDaly@gmail.com>
Date: Mon, 12 Aug 2024 13:49:34 -0400
Subject: [PATCH] Use Int256 to reduce BigInts in FD operations. (#93)

* Use Int256 to avoid BigInt in FD operations.

We do not here explicitly introduce support for FD{BitIntegers.Int256},
though that should work out of the box both before and after this PR.

Rather, this PR _uses_ a (U)Int256 under the hood to prevent allocations
from Int128 widening to BigInt in FD operations.

* Further reduce BigInts by skipping a `rem()` in iseven

* Bump patch version number
---
 Project.toml              |  4 +++-
 src/FixedPointDecimals.jl | 35 +++++++++++++++++++++++++++--------
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3f313f9..f120b6c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,13 +1,15 @@
 name = "FixedPointDecimals"
 uuid = "fb4d412d-6eee-574d-9565-ede6634db7b0"
 authors = ["Fengyang Wang <fengyang.wang.0@gmail.com>", "Curtis Vogt <curtis.vogt@gmail.com>"]
-version = "0.5.2"
+version = "0.5.3"
 
 [deps]
+BitIntegers = "c3b6d118-76ef-56ca-8cc7-ebb389d030a1"
 Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
 
 [compat]
 Parsers = "2.7"
+BitIntegers = "0.3.1"
 julia = "1.6"
 
 [extras]
diff --git a/src/FixedPointDecimals.jl b/src/FixedPointDecimals.jl
index 9b1a747..28b8611 100644
--- a/src/FixedPointDecimals.jl
+++ b/src/FixedPointDecimals.jl
@@ -35,6 +35,8 @@ export checked_abs, checked_add, checked_cld, checked_div, checked_fld,
     checked_mod, checked_mul, checked_neg, checked_rem, checked_sub
 
 using Base: decompose, BitInteger
+
+import BitIntegers  # For 128-bit _widemul / _widen
 import Parsers
 
 # floats that support fma and are roughly IEEE-like
@@ -118,6 +120,21 @@ function __init__()
     return
 end
 
+# Custom widemul implementation to avoid the cost of widening to BigInt.
+# FD{Int128} operations should widen to 256 bits internally, rather than to a BigInt.
+const BitInteger128 = Union{Int128, UInt128}
+_widemul(x, y) = _widen(x) * _widen(y)
+_widemul(x::Signed,y::Unsigned) = _widen(x) * signed(_widen(y))
+_widemul(x::Unsigned,y::Signed) = signed(_widen(x)) * _widen(y)
+
+# Custom widen implementation to avoid the cost of widening to BigInt.
+# FD{Int128} operations should widen to 256 bits internally, rather than to a BigInt.
+_widen(::Type{Int128}) = BitIntegers.Int256
+_widen(::Type{UInt128}) = BitIntegers.UInt256
+_widen(t::Type) = widen(t)
+_widen(x::T) where {T} = (_widen(T))(x)
+
+
 (::Type{T})(x::Real) where {T <: FD} = convert(T, x)
 
 floattype(::Type{<:FD{T}}) where {T<:Union{Int8, UInt8, Int16, UInt16}} = Float32
@@ -157,7 +174,9 @@ function _round_to_nearest(quotient::T,
                            divisor::T,
                            ::RoundingMode{M}=RoundNearest) where {T <: Integer, M}
     halfdivisor = divisor >> 1
-    if iseven(divisor) && remainder == halfdivisor
+    # PERF Note: Only need the last bit to check iseven, and default iseven(Int256)
+    # allocates, so we truncate first.
+    if iseven((divisor % Int8)) && remainder == halfdivisor
         # `:NearestTiesAway` will tie away from zero, e.g. -8.5 ->
         # -9. `:NearestTiesUp` will always ties towards positive
         # infinity. `:Nearest` will tie towards the nearest even
@@ -188,7 +207,7 @@ _round_to_nearest(q, r, d, m=RoundNearest) = _round_to_nearest(promote(q, r, d).
 # correctness test suite.
 function Base.:*(x::FD{T, f}, y::FD{T, f}) where {T, f}
     powt = coefficient(FD{T, f})
-    quotient, remainder = fldmodinline(widemul(x.i, y.i), powt)
+    quotient, remainder = fldmodinline(_widemul(x.i, y.i), powt)
     reinterpret(FD{T, f}, _round_to_nearest(quotient, remainder, powt))
 end
 
@@ -416,7 +435,7 @@ function Base.checked_sub(x::T, y::T) where {T<:FD}
 end
 function Base.checked_mul(x::FD{T,f}, y::FD{T,f}) where {T<:Integer,f}
     powt = coefficient(FD{T, f})
-    quotient, remainder = fldmodinline(widemul(x.i, y.i), powt)
+    quotient, remainder = fldmodinline(_widemul(x.i, y.i), powt)
     v = _round_to_nearest(quotient, remainder, powt)
     typemin(T) <= v <= typemax(T) || Base.Checked.throw_overflowerr_binaryop(:*, x, y)
     return reinterpret(FD{T, f}, T(v))
@@ -474,7 +493,7 @@ checked_rdiv(x::FD, y::FD) = checked_rdiv(promote(x, y)...)
 
 function checked_rdiv(x::FD{T,f}, y::FD{T,f}) where {T<:Integer,f}
     powt = coefficient(FD{T, f})
-    quotient, remainder = fldmod(widemul(x.i, powt), y.i)
+    quotient, remainder = fldmod(_widemul(x.i, powt), y.i)
     v = _round_to_nearest(quotient, remainder, y.i)
     typemin(T) <= v <= typemax(T) || Base.Checked.throw_overflowerr_binaryop(:/, x, y)
     return reinterpret(FD{T, f}, v)
@@ -484,8 +503,8 @@ end
 # FixedDecimal.
 function checked_rdiv(x::Integer, y::FD{T, f}) where {T<:Integer, f}
     powt = coefficient(FD{T, f})
-    powtsq = widemul(powt, powt)
-    quotient, remainder = fldmod(widemul(x, powtsq), y.i)
+    powtsq = _widemul(powt, powt)
+    quotient, remainder = fldmod(_widemul(x, powtsq), y.i)
     v = _round_to_nearest(quotient, remainder, y.i)
     typemin(T) <= v <= typemax(T) || Base.Checked.throw_overflowerr_binaryop(:/, x, y)
     reinterpret(FD{T, f}, v)
@@ -722,7 +741,7 @@ NOTE: This function is expensive, since it contains a while-loop, but it is actu
       This function does not have or depend on any side-effects.
 """
 function max_exp10(::Type{T}) where {T <: Integer}
-    W = widen(T)
+    W = _widen(T)
     type_max = W(typemax(T))
 
     powt = one(W)
@@ -759,4 +778,4 @@ value(fd::FD) = fd.i
 # for generic hashing
 Base.decompose(fd::FD) = decompose(Rational(fd))
 
-end
+end  # module