From 08a170d3bdb961728b1e10e3a4d04ae7f9d2a103 Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Tue, 16 Jul 2024 11:02:30 -0500
Subject: [PATCH 01/17] generate floats using lexographically ordered encoding

This uses the same encoding as `hypothesis` to give floats
better shrinking properties.
---
 src/data.jl | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/util.jl |   3 ++
 2 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/src/data.jl b/src/data.jl
index 39a3121..ae352b9 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -39,7 +39,7 @@ as well as these utility functions:
 module Data
 
 using Supposition
-using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject
+using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject, max_exponent, bias, assemble, tear, exposize, fracsize
 using RequiredInterfaces: @required
 using StyledStrings: @styled_str
 using Printf: format, @format_str
@@ -1449,9 +1449,118 @@ function Base.show(io::IO, ::MIME"text/plain", f::Floats)
     E.g. {code:$obj}; {code:isinf}: $inf, {code:isnan}: $nan""")
 end
 
+"""
+    exponent_key(T, e)
+
+A lexographical ordering for floating point exponents. The encoding is taken
+from hypothesis.
+The ordering is
+- non-negative exponents in increasing order
+- negative exponents in decreasing order
+- the maximum exponent
+"""
+function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned}
+    if e == max_exponent(T)
+        return Inf
+    end
+    unbiased = float(e) - bias(T)
+    if unbiased < 0
+        10000 - unbiased
+    else
+        unbiased
+    end
+end
+
+_make_encoding_table(T) = sort(
+    zero(Supposition.uint(T)):max_exponent(T),
+    by=x -> exponent_key(T, x))
+const ENCODING_TABLE = Dict(
+    UInt16 => _make_encoding_table(Float16),
+    UInt32 => _make_encoding_table(Float32),
+    UInt64 => _make_encoding_table(Float64))
+
+
+"""
+    update_mantissa(exponent, mantissa)
+
+Encode the mantissa of a floating point number using an encoding with better shrinking.
+
+"""
+function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned}
+    @assert Supposition.uint(T) == iT
+    # The unbiased exponent is <= 0
+    if exponent <= bias(T)
+        # reverse the bits of the mantissa in place
+        bitreverse(mantissa) >> (exposize(T) + 1)
+    elseif exponent >= fracsize(T) + bias(T)
+        mantissa
+    else
+        # reverse the low bits of the fractional part
+        # as determined by the exponent
+        n_reverse_bits = fracsize(T) + bias(T) - exponent
+        # isolate the bits to be reversed
+        to_reverse = mantissa & iT((1 << n_reverse_bits) - 1)
+        # zero them out
+        mantissa = mantissa ⊻ to_reverse
+        # reverse them and put them back in place
+        mantissa |= bitreverse(to_reverse) >> (8 * sizeof(T) - n_reverse_bits)
+    end
+end
+
+
+"""
+    lexographical_float(T, bits)
+
+Reinterpret the bits of a floating point number using an encoding with better shrinking
+properties.
+This produces a non-negative floating point number, possibly including NaN or Inf.
+
+The encoding is taken from hypothesis, and has the property that lexicographically smaller
+bit patterns corespond to 'simpler' floats.
+
+# Encoding
+
+The encoding used is as follows:
+
+If the sign bit is set: 
+
+    - the remainder of the first byte is ignored
+    - the remaining bytes are interpreted as an integer and converted to a float
+
+If the sign bit is not set:
+
+    - the exponent is decoded using `decode_exponent`
+    - the mantissa is updated using `update_mantissa`
+    - the float is reassembled using `assemble`
+
+"""
+function lexographical_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
+    sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!"))
+    iT = Supposition.uint(T)
+    sign, exponent, mantissa = tear(reinterpret(T, bits))
+    if sign == 1
+        exponent = ENCODING_TABLE[iT][exponent+1]
+        mantissa = update_mantissa(T, exponent, mantissa)
+        assemble(T, zero(iT), exponent, mantissa)
+    else
+        integral_mask = iT((1 << (8 * (sizeof(T) - 1))) - 1)
+        integral_part = bits & integral_mask
+        T(integral_part)
+    end
+end
+
 function produce!(tc::TestCase, f::Floats{T}) where {T}
     iT = Supposition.uint(T)
-    res = reinterpret(T, produce!(tc, Integers{iT}()))
+
+    bits = produce!(tc, Integers{iT}())
+
+    is_negative = produce!(tc, Booleans())
+
+    res = lexographical_float(T, bits)
+    if is_negative
+        res = -res
+    end
+
     # early rejections
     !f.infs && isinf(res) && reject(tc)
     !f.nans && isnan(res) && reject(tc)
diff --git a/src/util.jl b/src/util.jl
index 86012a2..1e1402f 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -26,6 +26,9 @@ exposize(::Type{Float16}) = 5
 exposize(::Type{Float32}) = 8
 exposize(::Type{Float64}) = 11
 
+max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << exposize(T) - 1)
+bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1)
+
 function masks(::Type{T}) where T <: Base.IEEEFloat
     ui = uint(T)
     signbitmask = one(ui) << (8*sizeof(ui)-1)

From 1df83c0c1374c9b51e0923258506125c7a200fd3 Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Wed, 17 Jul 2024 09:26:26 -0500
Subject: [PATCH 02/17] add tests for encoding of float exponent

---
 src/Supposition.jl |   1 +
 src/data.jl        | 102 +------------------------------
 src/float.jl       | 147 +++++++++++++++++++++++++++++++++++++++++++++
 test/runtests.jl   |  21 ++++++-
 4 files changed, 170 insertions(+), 101 deletions(-)
 create mode 100644 src/float.jl

diff --git a/src/Supposition.jl b/src/Supposition.jl
index a5ae9b7..012dca2 100644
--- a/src/Supposition.jl
+++ b/src/Supposition.jl
@@ -35,6 +35,7 @@ using StyledStrings
 include("types.jl")
 include("testcase.jl")
 include("util.jl")
+include("float.jl")
 include("data.jl")
 include("teststate.jl")
 include("shrink.jl")
diff --git a/src/data.jl b/src/data.jl
index ae352b9..ba3fce0 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -39,7 +39,8 @@ as well as these utility functions:
 module Data
 
 using Supposition
-using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject, max_exponent, bias, assemble, tear, exposize, fracsize
+using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject
+using Supposition.FloatEncoding: lexographical_float
 using RequiredInterfaces: @required
 using StyledStrings: @styled_str
 using Printf: format, @format_str
@@ -1449,105 +1450,6 @@ function Base.show(io::IO, ::MIME"text/plain", f::Floats)
     E.g. {code:$obj}; {code:isinf}: $inf, {code:isnan}: $nan""")
 end
 
-"""
-    exponent_key(T, e)
-
-A lexographical ordering for floating point exponents. The encoding is taken
-from hypothesis.
-The ordering is
-- non-negative exponents in increasing order
-- negative exponents in decreasing order
-- the maximum exponent
-"""
-function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned}
-    if e == max_exponent(T)
-        return Inf
-    end
-    unbiased = float(e) - bias(T)
-    if unbiased < 0
-        10000 - unbiased
-    else
-        unbiased
-    end
-end
-
-_make_encoding_table(T) = sort(
-    zero(Supposition.uint(T)):max_exponent(T),
-    by=x -> exponent_key(T, x))
-const ENCODING_TABLE = Dict(
-    UInt16 => _make_encoding_table(Float16),
-    UInt32 => _make_encoding_table(Float32),
-    UInt64 => _make_encoding_table(Float64))
-
-
-"""
-    update_mantissa(exponent, mantissa)
-
-Encode the mantissa of a floating point number using an encoding with better shrinking.
-
-"""
-function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned}
-    @assert Supposition.uint(T) == iT
-    # The unbiased exponent is <= 0
-    if exponent <= bias(T)
-        # reverse the bits of the mantissa in place
-        bitreverse(mantissa) >> (exposize(T) + 1)
-    elseif exponent >= fracsize(T) + bias(T)
-        mantissa
-    else
-        # reverse the low bits of the fractional part
-        # as determined by the exponent
-        n_reverse_bits = fracsize(T) + bias(T) - exponent
-        # isolate the bits to be reversed
-        to_reverse = mantissa & iT((1 << n_reverse_bits) - 1)
-        # zero them out
-        mantissa = mantissa ⊻ to_reverse
-        # reverse them and put them back in place
-        mantissa |= bitreverse(to_reverse) >> (8 * sizeof(T) - n_reverse_bits)
-    end
-end
-
-
-"""
-    lexographical_float(T, bits)
-
-Reinterpret the bits of a floating point number using an encoding with better shrinking
-properties.
-This produces a non-negative floating point number, possibly including NaN or Inf.
-
-The encoding is taken from hypothesis, and has the property that lexicographically smaller
-bit patterns corespond to 'simpler' floats.
-
-# Encoding
-
-The encoding used is as follows:
-
-If the sign bit is set: 
-
-    - the remainder of the first byte is ignored
-    - the remaining bytes are interpreted as an integer and converted to a float
-
-If the sign bit is not set:
-
-    - the exponent is decoded using `decode_exponent`
-    - the mantissa is updated using `update_mantissa`
-    - the float is reassembled using `assemble`
-
-"""
-function lexographical_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
-    sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!"))
-    iT = Supposition.uint(T)
-    sign, exponent, mantissa = tear(reinterpret(T, bits))
-    if sign == 1
-        exponent = ENCODING_TABLE[iT][exponent+1]
-        mantissa = update_mantissa(T, exponent, mantissa)
-        assemble(T, zero(iT), exponent, mantissa)
-    else
-        integral_mask = iT((1 << (8 * (sizeof(T) - 1))) - 1)
-        integral_part = bits & integral_mask
-        T(integral_part)
-    end
-end
 
 function produce!(tc::TestCase, f::Floats{T}) where {T}
     iT = Supposition.uint(T)
diff --git a/src/float.jl b/src/float.jl
new file mode 100644
index 0000000..8704e64
--- /dev/null
+++ b/src/float.jl
@@ -0,0 +1,147 @@
+module FloatEncoding
+using Supposition: uint, tear, bias, fracsize, exposize, max_exponent, assemble
+
+"""
+    exponent_key(T, e)
+
+A lexographical ordering for floating point exponents. The encoding is taken
+from hypothesis.
+The ordering is
+- non-negative exponents in increasing order
+- negative exponents in decreasing order
+- the maximum exponent
+"""
+function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned}
+    if e == max_exponent(T)
+        return Inf
+    end
+    unbiased = float(e) - bias(T)
+    if unbiased < 0
+        10000 - unbiased
+    else
+        unbiased
+    end
+end
+
+_make_encoding_table(T) = sort(
+    zero(uint(T)):max_exponent(T),
+    by=Base.Fix1(exponent_key, T))
+const ENCODING_TABLE = Dict(
+    UInt16 => _make_encoding_table(Float16),
+    UInt32 => _make_encoding_table(Float32),
+    UInt64 => _make_encoding_table(Float64))
+
+encode_exponent(e::T) where {T<:Unsigned} = ENCODING_TABLE[T][e+1]
+
+function _make_decoding_table(T)
+    decoding_table = zeros(uint(T), max_exponent(T) + 1)
+    for (i, e) in enumerate(ENCODING_TABLE[uint(T)])
+        decoding_table[e+1] = i - 1
+    end
+    decoding_table
+end
+const DECODING_TABLE = Dict(
+    UInt16 => _make_decoding_table(Float16),
+    UInt32 => _make_decoding_table(Float32),
+    UInt64 => _make_decoding_table(Float64))
+decode_exponent(e::T) where {T<:Unsigned} = DECODING_TABLE[T][e+1]
+
+
+"""
+    update_mantissa(exponent, mantissa)
+
+Encode the mantissa of a floating point number using an encoding with better shrinking.
+"""
+function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned}
+    @assert uint(T) == iT
+    # The unbiased exponent is <= 0
+    if exponent <= bias(T)
+        # reverse the bits of the mantissa in place
+        bitreverse(mantissa) >> (exposize(T) + 1)
+    elseif exponent >= fracsize(T) + bias(T)
+        mantissa
+    else
+        # reverse the low bits of the fractional part
+        # as determined by the exponent
+        n_reverse_bits = fracsize(T) + bias(T) - exponent
+        # isolate the bits to be reversed
+        to_reverse = mantissa & iT((1 << n_reverse_bits) - 1)
+        # zero them out
+        mantissa = mantissa ⊻ to_reverse
+        # reverse them and put them back in place
+        mantissa |= bitreverse(to_reverse) >> (8 * sizeof(T) - n_reverse_bits)
+    end
+end
+
+
+"""
+    lexographical_float(T, bits)
+
+Reinterpret the bits of a floating point number using an encoding with better shrinking
+properties.
+This produces a non-negative floating point number, possibly including NaN or Inf.
+
+The encoding is taken from hypothesis, and has the property that lexicographically smaller
+bit patterns corespond to 'simpler' floats.
+
+# Encoding
+
+The encoding used is as follows:
+
+If the sign bit is set: 
+
+    - the remainder of the first byte is ignored
+    - the remaining bytes are interpreted as an integer and converted to a float
+
+If the sign bit is not set:
+
+    - the exponent is decoded using `decode_exponent`
+    - the mantissa is updated using `update_mantissa`
+    - the float is reassembled using `assemble`
+
+"""
+function lexographical_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
+    sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!"))
+    iT = uint(T)
+    sign, exponent, mantissa = tear(reinterpret(T, bits))
+    if sign == 1
+        exponent = encode_exponent(exponent)
+        mantissa = update_mantissa(T, exponent, mantissa)
+        assemble(T, zero(iT), exponent, mantissa)
+    else
+        integral_mask = iT((1 << (8 * (sizeof(T) - 1))) - 1)
+        integral_part = bits & integral_mask
+        T(integral_part)
+    end
+end
+
+function float_to_lex(f::T) where {T<:Base.IEEEFloat}
+    if is_simple_float(f)
+        uint(T)(f)
+    else
+        base_float_to_lex(f)
+    end
+end
+
+function is_simple_float(f::T) where {T<:Base.IEEEFloat}
+    try
+        if trunc(f) != f
+            return false
+        end
+        Base.top_set_bit(uint(f)) <= 8 * (sizeof(T) - 1)
+    catch e
+        if isa(e, InexactError)
+            return false
+        end
+        rethrow(e)
+    end
+end
+
+function base_float_to_lex(f::T) where {T<:Base.IEEEFloat}
+    sign, exponent, mantissa = tear(f)
+    mantissa = update_mantissa(T, exponent, mantissa)
+    exponent = decode_exponent(exponent)
+
+    reinterpret(uint(T), assemble(T, sign, exponent, mantissa))
+end
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 166c92a..44f10ca 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,5 +1,5 @@
 using Supposition
-using Supposition: Data, test_function, shrink_remove, shrink_redistribute,
+using Supposition: Data, FloatEncoding, test_function, shrink_remove, shrink_redistribute,
         NoRecordDB, UnsetDB, Attempt, DEFAULT_CONFIG, TestCase, TestState, choice!, weighted!
 using Test
 using Aqua
@@ -513,6 +513,25 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
         @test_throws ArgumentError Data.Floats(;minimum=2.0, maximum=1.0)
     end
 
+    # Tests the properties of the enocding used to represent floating point numbers
+    @testset "Floating point encoding" begin
+        @testset for T in (Float16, Float32, Float64)
+            # These invariants are ported from Hypothesis
+            @testset "Exponent encoding" begin
+                exponents = zero(Supposition.uint(T)):Supposition.max_exponent(T)
+
+                # Round tripping
+                @test all(exponents) do e
+                    FloatEncoding.decode_exponent(FloatEncoding.encode_exponent(e)) == e
+                end
+
+                @test all(exponents) do e
+                    FloatEncoding.encode_exponent(FloatEncoding.decode_exponent(e)) == e
+                end
+            end
+        end
+    end
+
     @testset "@check API" begin
         # These tests are for accepted syntax, not functionality, so only one example is fine
         API_conf = Supposition.merge(DEFAULT_CONFIG[]; verbose=verb, max_examples=1)

From e58ac23889509e37731d26ca8600d25dc4b308ab Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Wed, 17 Jul 2024 12:24:24 -0500
Subject: [PATCH 03/17] port float encoding tests from hypothesis

This just ports the tests that check the ordering properties of the
encoding and fixes the bugs that were found by the tests.
---
 src/data.jl      |  4 ++--
 src/float.jl     |  8 +++----
 test/runtests.jl | 58 +++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/src/data.jl b/src/data.jl
index ba3fce0..3fce06f 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -40,7 +40,7 @@ module Data
 
 using Supposition
 using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject
-using Supposition.FloatEncoding: lexographical_float
+using Supposition.FloatEncoding: lex_to_float
 using RequiredInterfaces: @required
 using StyledStrings: @styled_str
 using Printf: format, @format_str
@@ -1458,7 +1458,7 @@ function produce!(tc::TestCase, f::Floats{T}) where {T}
 
     is_negative = produce!(tc, Booleans())
 
-    res = lexographical_float(T, bits)
+    res = lex_to_float(T, bits)
     if is_negative
         res = -res
     end
diff --git a/src/float.jl b/src/float.jl
index 8704e64..a8e6eaa 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -75,7 +75,7 @@ end
 
 
 """
-    lexographical_float(T, bits)
+    lex_to_float(T, bits)
 
 Reinterpret the bits of a floating point number using an encoding with better shrinking
 properties.
@@ -100,7 +100,7 @@ If the sign bit is not set:
     - the float is reassembled using `assemble`
 
 """
-function lexographical_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
+function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
     sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!"))
     iT = uint(T)
     sign, exponent, mantissa = tear(reinterpret(T, bits))
@@ -128,7 +128,7 @@ function is_simple_float(f::T) where {T<:Base.IEEEFloat}
         if trunc(f) != f
             return false
         end
-        Base.top_set_bit(uint(f)) <= 8 * (sizeof(T) - 1)
+        Base.top_set_bit(reinterpret(uint(T), f)) <= 8 * (sizeof(T) - 1)
     catch e
         if isa(e, InexactError)
             return false
@@ -142,6 +142,6 @@ function base_float_to_lex(f::T) where {T<:Base.IEEEFloat}
     mantissa = update_mantissa(T, exponent, mantissa)
     exponent = decode_exponent(exponent)
 
-    reinterpret(uint(T), assemble(T, sign, exponent, mantissa))
+    reinterpret(uint(T), assemble(T, one(uint(T)), exponent, mantissa))
 end
 end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 44f10ca..8427069 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -516,9 +516,11 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
     # Tests the properties of the enocding used to represent floating point numbers
     @testset "Floating point encoding" begin
         @testset for T in (Float16, Float32, Float64)
+
+            iT = Supposition.uint(T)
             # These invariants are ported from Hypothesis
             @testset "Exponent encoding" begin
-                exponents = zero(Supposition.uint(T)):Supposition.max_exponent(T)
+                exponents = zero(iT):Supposition.max_exponent(T)
 
                 # Round tripping
                 @test all(exponents) do e
@@ -529,6 +531,60 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
                     FloatEncoding.encode_exponent(FloatEncoding.decode_exponent(e)) == e
                 end
             end
+
+            function roundtrip_encoding(f)
+                assume!(!signbit(f))
+                encoded = FloatEncoding.float_to_lex(f)
+                decoded = FloatEncoding.lex_to_float(T, encoded)
+                reinterpret(iT, decoded) == reinterpret(iT, f)
+            end
+
+            roundtrip_examples = map(Data.Just,
+                T[
+                    0.0,
+                    2.5,
+                    8.000000000000007,
+                    3.0,
+                    2.0,
+                    1.9999999999999998,
+                    1.0
+                ])
+            @check roundtrip_encoding(Data.OneOf(roundtrip_examples...))
+            @check roundtrip_encoding(Data.Floats{T}(; minimum=zero(T)))
+
+            @testset "Ordering" begin
+                function order_integral_part(n, g)
+                    f = n + g
+                    assume!(trunc(f) != f)
+                    assume!(trunc(f) != 0)
+                    i = FloatEncoding.float_to_lex(f)
+                    g = trunc(f)
+                    FloatEncoding.float_to_lex(g) < i
+                end
+
+                @check order_integral_part(Data.Just(1.0), Data.Just(0.5))
+                @check order_integral_part(
+                    Data.Floats{T}(;
+                        minimum=one(T),
+                        maximum=T(2^(Supposition.fracsize(T) + 1)),
+                        nans=false),
+                    filter(x -> !(x in T[0, 1]),
+                        Data.Floats{T}(; minimum=zero(T), maximum=one(T), nans=false)))
+
+                integral_float_gen = map(abs ∘ trunc,
+                    Data.Floats{T}(; minimum=zero(T), infs=false, nans=false))
+
+                @check function integral_floats_order_as_integers(x=integral_float_gen,
+                    y=integral_float_gen)
+                    (x < y) == (FloatEncoding.float_to_lex(x) < FloatEncoding.float_to_lex(y))
+                end
+
+                @check function fractional_floats_greater_than_1(
+                    f=Data.Floats{T}(; minimum=zero(T), maximum=one(T), nans=false))
+                    assume!(0 < f < 1)
+                    FloatEncoding.float_to_lex(f) > FloatEncoding.float_to_lex(one(T))
+                end
+            end
         end
     end
 

From 4ecb381f0bdc5261eaa1670f873280a79bbadeeb Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Wed, 17 Jul 2024 13:46:35 -0500
Subject: [PATCH 04/17] replace top_set_bit to be compatible with older julia

---
 src/float.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/float.jl b/src/float.jl
index a8e6eaa..2f6e9b5 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -128,7 +128,7 @@ function is_simple_float(f::T) where {T<:Base.IEEEFloat}
         if trunc(f) != f
             return false
         end
-        Base.top_set_bit(reinterpret(uint(T), f)) <= 8 * (sizeof(T) - 1)
+        ndigits(reinterpret(uint(T), f), base=2) <= 8 * (sizeof(T) - 1)
     catch e
         if isa(e, InexactError)
             return false
@@ -138,7 +138,7 @@ function is_simple_float(f::T) where {T<:Base.IEEEFloat}
 end
 
 function base_float_to_lex(f::T) where {T<:Base.IEEEFloat}
-    sign, exponent, mantissa = tear(f)
+    _, exponent, mantissa = tear(f)
     mantissa = update_mantissa(T, exponent, mantissa)
     exponent = decode_exponent(exponent)
 

From 822574efad146e71e70245521ffc1e7e99a82458 Mon Sep 17 00:00:00 2001
From: Zach Raines <zach@zmraines.com>
Date: Thu, 18 Jul 2024 09:51:42 -0500
Subject: [PATCH 05/17] docstring and formatting changes from code review

Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com>
---
 src/float.jl | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/float.jl b/src/float.jl
index 2f6e9b5..9fd5951 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -23,9 +23,8 @@ function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned}
     end
 end
 
-_make_encoding_table(T) = sort(
-    zero(uint(T)):max_exponent(T),
-    by=Base.Fix1(exponent_key, T))
+_make_encoding_table(T) = sort(zero(uint(T)):max_exponent(T);
+                               by = Base.Fix1(exponent_key, T))
 const ENCODING_TABLE = Dict(
     UInt16 => _make_encoding_table(Float16),
     UInt32 => _make_encoding_table(Float32),
@@ -79,10 +78,10 @@ end
 
 Reinterpret the bits of a floating point number using an encoding with better shrinking
 properties.
-This produces a non-negative floating point number, possibly including NaN or Inf.
+This produces a non-negative floating point number, possibly including `NaN` or `Inf`.
 
-The encoding is taken from hypothesis, and has the property that lexicographically smaller
-bit patterns corespond to 'simpler' floats.
+The encoding is ported from hypothesis, and has the property that lexicographically smaller
+bit patterns correspond to 'simpler' floats.
 
 # Encoding
 
@@ -101,7 +100,7 @@ If the sign bit is not set:
 
 """
 function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
-    sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!"))
+    sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bitwidth of the given bits!"))
     iT = uint(T)
     sign, exponent, mantissa = tear(reinterpret(T, bits))
     if sign == 1

From fb8907cda1d2cb502e92ca974729170c9cadd491 Mon Sep 17 00:00:00 2001
From: Zach Raines <zach@zmraines.com>
Date: Thu, 18 Jul 2024 09:55:16 -0500
Subject: [PATCH 06/17] optimizations from code review

Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com>
---
 src/float.jl     | 7 ++++---
 src/util.jl      | 2 +-
 test/runtests.jl | 3 +--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/float.jl b/src/float.jl
index 9fd5951..d771199 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -103,12 +103,12 @@ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
     sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bitwidth of the given bits!"))
     iT = uint(T)
     sign, exponent, mantissa = tear(reinterpret(T, bits))
-    if sign == 1
+    if isone(sign)
         exponent = encode_exponent(exponent)
         mantissa = update_mantissa(T, exponent, mantissa)
         assemble(T, zero(iT), exponent, mantissa)
     else
-        integral_mask = iT((1 << (8 * (sizeof(T) - 1))) - 1)
+        integral_mask = iT(-1) >>> 0x8
         integral_part = bits & integral_mask
         T(integral_part)
     end
@@ -127,7 +127,8 @@ function is_simple_float(f::T) where {T<:Base.IEEEFloat}
         if trunc(f) != f
             return false
         end
-        ndigits(reinterpret(uint(T), f), base=2) <= 8 * (sizeof(T) - 1)
+        # In the encoding, the float is simple if the first byte is all zeros
+        leading_zeros(reinterpret(uint(T), f)) >= 8
     catch e
         if isa(e, InexactError)
             return false
diff --git a/src/util.jl b/src/util.jl
index 1e1402f..768e32b 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -26,7 +26,7 @@ exposize(::Type{Float16}) = 5
 exposize(::Type{Float32}) = 8
 exposize(::Type{Float64}) = 11
 
-max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << exposize(T) - 1)
+max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = (1 << exposize(T) - 1) % uint(T)
 bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1)
 
 function masks(::Type{T}) where T <: Base.IEEEFloat
diff --git a/test/runtests.jl b/test/runtests.jl
index 8427069..51ed347 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -568,8 +568,7 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
                         minimum=one(T),
                         maximum=T(2^(Supposition.fracsize(T) + 1)),
                         nans=false),
-                    filter(x -> !(x in T[0, 1]),
-                        Data.Floats{T}(; minimum=zero(T), maximum=one(T), nans=false)))
+                        Data.Floats{T}(; minimum=nextfloat(zero(T)), maximum=prevfloat(one(T)), nans=false))
 
                 integral_float_gen = map(abs ∘ trunc,
                     Data.Floats{T}(; minimum=zero(T), infs=false, nans=false))

From fb2b833fb5e63e4beec3f548a381a0763ac15891 Mon Sep 17 00:00:00 2001
From: Zach Raines <zach@zmraines.com>
Date: Thu, 18 Jul 2024 09:55:44 -0500
Subject: [PATCH 07/17] improve formatting of docstring

Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com>
---
 src/float.jl | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/float.jl b/src/float.jl
index d771199..5135346 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -4,12 +4,13 @@ using Supposition: uint, tear, bias, fracsize, exposize, max_exponent, assemble
 """
     exponent_key(T, e)
 
-A lexographical ordering for floating point exponents. The encoding is taken
+A lexicographical ordering for floating point exponents. The encoding is ported
 from hypothesis.
+
 The ordering is
-- non-negative exponents in increasing order
-- negative exponents in decreasing order
-- the maximum exponent
+    * non-negative exponents in increasing order
+    * negative exponents in decreasing order
+    * the maximum exponent
 """
 function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned}
     if e == max_exponent(T)

From d1ece54745744cc3b081deb7925005c94e42c9c6 Mon Sep 17 00:00:00 2001
From: Zach Raines <zach@zmraines.com>
Date: Thu, 18 Jul 2024 10:06:59 -0500
Subject: [PATCH 08/17] explain exponent encoding tests

Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com>
---
 test/runtests.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 51ed347..8e36ab9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -520,6 +520,8 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
             iT = Supposition.uint(T)
             # These invariants are ported from Hypothesis
             @testset "Exponent encoding" begin
+                # The highest value here is 0x7ff (2047) with Int64,
+                # so we can just exhaustively test all 2048 possibilities
                 exponents = zero(iT):Supposition.max_exponent(T)
 
                 # Round tripping

From 9bfb8f53a235360952f5e7ee51f1a6068d3a1daf Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Thu, 18 Jul 2024 10:44:30 -0500
Subject: [PATCH 09/17] fix calculation of integral_mask

---
 src/float.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/float.jl b/src/float.jl
index 5135346..9d223be 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -109,7 +109,7 @@ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
         mantissa = update_mantissa(T, exponent, mantissa)
         assemble(T, zero(iT), exponent, mantissa)
     else
-        integral_mask = iT(-1) >>> 0x8
+        integral_mask = signed(iT)(-1) >>> 0x8
         integral_part = bits & integral_mask
         T(integral_part)
     end

From c207dc9dc00f7c70a62605960792c1dce3a3120b Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Thu, 18 Jul 2024 11:22:53 -0500
Subject: [PATCH 10/17] add documentation for float encodings

---
 src/float.jl | 147 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 128 insertions(+), 19 deletions(-)

diff --git a/src/float.jl b/src/float.jl
index 9d223be..ae22798 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -5,12 +5,17 @@ using Supposition: uint, tear, bias, fracsize, exposize, max_exponent, assemble
     exponent_key(T, e)
 
 A lexicographical ordering for floating point exponents. The encoding is ported
-from hypothesis.
+from Hypothesis.
 
 The ordering is
+
     * non-negative exponents in increasing order
     * negative exponents in decreasing order
     * the maximum exponent
+
+# Extended Help
+
+The [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L82) in Hypothesis.
 """
 function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned}
     if e == max_exponent(T)
@@ -18,21 +23,61 @@ function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned}
     end
     unbiased = float(e) - bias(T)
     if unbiased < 0
-        10000 - unbiased
+        # order all negative exponents after the positive ones
+        # in reverse order
+        # max_exponent(T) - 1 maps to the key bias(T)
+        # so the first negative exponent maps to bias(T) + 1
+        bias(T) - unbiased
     else
         unbiased
     end
 end
 
+""" 
+    _make_encoding_table(T)
+
+Build a look up table for encoding exponents of floating point numbers of type `T`.
+For a floating point type `T`, the lookup table is a permutation of the unsigned integers of type [`uint`](@ref) from `0` to `max_exponent(T)`.
+
+This allows the reordering of the exponent bits of a floating point number according to the encoding described in [`exponent_key`](@ref).
+"""
 _make_encoding_table(T) = sort(zero(uint(T)):max_exponent(T);
                                by = Base.Fix1(exponent_key, T))
+"""
+    ENCODING_TABLE
+
+A dictionary mapping `Unsigned` types to encoding tables for exponents.
+The encoding is described in [`exponent_key`](@ref) and is ported from Hypothesis.
+
+# See Also
+
+[`encode_exponent`](@ref)
+[`DECODING_TABLE`](@ref)
+"""
 const ENCODING_TABLE = Dict(
     UInt16 => _make_encoding_table(Float16),
     UInt32 => _make_encoding_table(Float32),
     UInt64 => _make_encoding_table(Float64))
 
+"""
+    encode_exponent(e)
+
+Encode the exponent of a floating point number using an encoding with better shrinking.
+The exponent can be extracted from a floating point number `f` using [`tear`](@ref).
+
+# See Also
+
+[`ENCODING_TABLE`](@ref)
+[`exponent_key`](@ref)
+[`tear`](@ref)
+"""
 encode_exponent(e::T) where {T<:Unsigned} = ENCODING_TABLE[T][e+1]
 
+"""
+    _make_decoding_table(T)
+
+Build a look up table for decoding exponents of floating point numbers of type `T` which is the inverse of the table built by [`_make_encoding_table`](@ref).
+"""
 function _make_decoding_table(T)
     decoding_table = zeros(uint(T), max_exponent(T) + 1)
     for (i, e) in enumerate(ENCODING_TABLE[uint(T)])
@@ -40,10 +85,28 @@ function _make_decoding_table(T)
     end
     decoding_table
 end
+
+"""
+    DECODING_TABLE
+
+A dictionary mapping `Unsigned` types to decoding tables for exponents.
+The encoding is described in [`exponent_key`](@ref) and is ported from Hypothesis.
+
+# See Also
+
+[`decode_exponent`](@ref)
+[`ENCODING_TABLE`](@ref)
+"""
 const DECODING_TABLE = Dict(
     UInt16 => _make_decoding_table(Float16),
     UInt32 => _make_decoding_table(Float32),
     UInt64 => _make_decoding_table(Float64))
+
+"""
+    decode_exponent(e)
+
+Undoes the encoding of the exponent of a floating point number used by [`encode_exponent`](@ref).
+"""
 decode_exponent(e::T) where {T<:Unsigned} = DECODING_TABLE[T][e+1]
 
 
@@ -51,6 +114,17 @@ decode_exponent(e::T) where {T<:Unsigned} = DECODING_TABLE[T][e+1]
     update_mantissa(exponent, mantissa)
 
 Encode the mantissa of a floating point number using an encoding with better shrinking.
+The encoding is ported from Hypothesis.
+
+The encoding is as follows:
+
+    * If the unbiased exponent is <= 0, reverse the bits of the mantissa
+    * If the unbiased exponent is >= fracsize(T) + bias(T), do nothing
+    * Otherwise, reverse the low bits of the fractional part
+
+# Extended help
+
+See the [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L165) in hypothesis
 """
 function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned}
     @assert uint(T) == iT
@@ -81,7 +155,7 @@ Reinterpret the bits of a floating point number using an encoding with better sh
 properties.
 This produces a non-negative floating point number, possibly including `NaN` or `Inf`.
 
-The encoding is ported from hypothesis, and has the property that lexicographically smaller
+The encoding is ported from Hypothesis, and has the property that lexicographically smaller
 bit patterns correspond to 'simpler' floats.
 
 # Encoding
@@ -95,9 +169,13 @@ If the sign bit is set:
 
 If the sign bit is not set:
 
-    - the exponent is decoded using `decode_exponent`
-    - the mantissa is updated using `update_mantissa`
-    - the float is reassembled using `assemble`
+    - the exponent is encoded using [`encoded_exponent`](@ref)
+    - the mantissa is updated using [`update_mantissa`](@ref)
+    - the float is reassembled using [`assemble`](@ref)
+
+## Extended Help
+
+See the [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L176) in Hypothesis.
 
 """
 function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
@@ -115,30 +193,61 @@ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
     end
 end
 
+"""
+    float_to_lex(f)
+
+Encoding a floating point number as a bit pattern.
+
+This is essentially the inverse of [`lex_to_float`](@ref) and produces a bit pattern that is lexicographically
+smaller for 'simpler' floats.
+
+Note that while `lex_to_float` can produce any valid positive floating point number, it is not injective. So combined with the fact that positive and negative floats map to the same bit pattern,
+`float_to_lex` is not an exact inverse of `lex_to_float`.
+"""
 function float_to_lex(f::T) where {T<:Base.IEEEFloat}
+    # If the float is simple, we can just reinterpret it as an integer
+    # This corresponds to the latter branch of lex_to_float
     if is_simple_float(f)
         uint(T)(f)
     else
-        base_float_to_lex(f)
+        nonsimple_float_to_lex(f)
     end
 end
 
+"""
+    is_simple_float(f)
+
+`f` is simple if it is integral and the first byte is all zeros.
+"""
 function is_simple_float(f::T) where {T<:Base.IEEEFloat}
-    try
-        if trunc(f) != f
-            return false
-        end
-        # In the encoding, the float is simple if the first byte is all zeros
-        leading_zeros(reinterpret(uint(T), f)) >= 8
-    catch e
-        if isa(e, InexactError)
-            return false
-        end
-        rethrow(e)
+    if trunc(f) != f
+        return false
     end
+    # In the encoding, the float is simple if the first byte is all zeros
+    leading_zeros(reinterpret(uint(T), f)) >= 8
 end
 
-function base_float_to_lex(f::T) where {T<:Base.IEEEFloat}
+"""
+    nonsimple_float_to_lex(f)
+
+Encode a floating point number as a bit pattern, when the float is not simple.
+
+This is the inverse of [`lex_to_float`](@ref) for bit patterns with the signbit set i.e.,
+
+```jldoctest
+julia> using Supposition.FloatEncoding: lex_to_float, nonsimple_float_to_lex
+
+julia> bits = 0xff00
+0xff00
+
+julia> signbit(reinterpret(Float16, bits))
+true
+
+julia> nonsimple_float_to_lex(lex_to_float(Float16, bits)) == bits
+true
+```
+"""
+function nonsimple_float_to_lex(f::T) where {T<:Base.IEEEFloat}
     _, exponent, mantissa = tear(f)
     mantissa = update_mantissa(T, exponent, mantissa)
     exponent = decode_exponent(exponent)

From c8462e4e574b6056e580f64f41ba7cdcc3f783e6 Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Thu, 18 Jul 2024 11:48:50 -0500
Subject: [PATCH 11/17] replace assertion in update_mantissa

---
 src/float.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/float.jl b/src/float.jl
index ae22798..638b6b3 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -127,7 +127,8 @@ The encoding is as follows:
 See the [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L165) in hypothesis
 """
 function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned}
-    @assert uint(T) == iT
+    exponent = convert(uint(T), exponent)
+    mantissa = convert(uint(T), mantissa)
     # The unbiased exponent is <= 0
     if exponent <= bias(T)
         # reverse the bits of the mantissa in place

From 54b1c8d3dcb0cce5a78df81cc143f573e1880b1e Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Thu, 18 Jul 2024 12:03:16 -0500
Subject: [PATCH 12/17] simplify float encoding test code

---
 test/runtests.jl | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 8e36ab9..3e343cc 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -535,14 +535,12 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
             end
 
             function roundtrip_encoding(f)
-                assume!(!signbit(f))
                 encoded = FloatEncoding.float_to_lex(f)
                 decoded = FloatEncoding.lex_to_float(T, encoded)
                 reinterpret(iT, decoded) == reinterpret(iT, f)
             end
 
-            roundtrip_examples = map(Data.Just,
-                T[
+            @testset for f in T[
                     0.0,
                     2.5,
                     8.000000000000007,
@@ -550,9 +548,11 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
                     2.0,
                     1.9999999999999998,
                     1.0
-                ])
-            @check roundtrip_encoding(Data.OneOf(roundtrip_examples...))
-            @check roundtrip_encoding(Data.Floats{T}(; minimum=zero(T)))
+            ]
+                @test roundtrip_encoding(f)
+            end
+
+            @check roundtrip_encoding(Data.Floats{T}(; minimum=nextfloat(zero(T))))
 
             @testset "Ordering" begin
                 function order_integral_part(n, g)
@@ -581,8 +581,7 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
                 end
 
                 @check function fractional_floats_greater_than_1(
-                    f=Data.Floats{T}(; minimum=zero(T), maximum=one(T), nans=false))
-                    assume!(0 < f < 1)
+                    f=Data.Floats{T}(; minimum=nextfloat(zero(T)), maximum=prevfloat(one(T)), nans=false))
                     FloatEncoding.float_to_lex(f) > FloatEncoding.float_to_lex(one(T))
                 end
             end

From 3d0e21119f9e3d08a1148737d99cfccdbe7d0b84 Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Thu, 18 Jul 2024 12:21:35 -0500
Subject: [PATCH 13/17] move floating point utilities to FloatEncoding

---
 src/data.jl      |  8 ++--
 src/float.jl     | 96 +++++++++++++++++++++++++++++++++++++++++++++++-
 src/util.jl      | 54 ---------------------------
 test/runtests.jl |  8 ++--
 4 files changed, 103 insertions(+), 63 deletions(-)

diff --git a/src/data.jl b/src/data.jl
index 3fce06f..3b68533 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -39,7 +39,7 @@ as well as these utility functions:
 module Data
 
 using Supposition
-using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject
+using Supposition: FloatEncoding, smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject
 using Supposition.FloatEncoding: lex_to_float
 using RequiredInterfaces: @required
 using StyledStrings: @styled_str
@@ -1452,7 +1452,7 @@ end
 
 
 function produce!(tc::TestCase, f::Floats{T}) where {T}
-    iT = Supposition.uint(T)
+    iT = FloatEncoding.uint(T)
 
     bits = produce!(tc, Integers{iT}())
 
@@ -1478,8 +1478,8 @@ function float_remap(num::T, _min::T, _max::T) where T <: AbstractFloat
     # We're outside of the desired bounds, so use the mantissa
     # to resample the actual range
     range_size = min(_max - _min, floatmax(T))
-    _, _, mantissa = Supposition.tear(num)
-    max_mantissa = oftype(mantissa, (2^Supposition.fracsize(T)) - 1)
+    _, _, mantissa = FloatEncoding.tear(num)
+    max_mantissa = oftype(mantissa, (2^FloatEncoding.fracsize(T)) - 1)
     num = _min + range_size * (mantissa / max_mantissa)
 
     # ensure the value is still in the desired range
diff --git a/src/float.jl b/src/float.jl
index 638b6b3..cee1052 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -1,5 +1,99 @@
+"""
+    FloatEncoding
+
+This module includes utilities for maniuplating floating point numbers in an IEEE 754 encoding:
+
+Additionally, it implements an encoding for floating point numbers that has better shrinking properties, ported from hypothesis. See [`lex_to_float`](@ref) for more details.
+"""
 module FloatEncoding
-using Supposition: uint, tear, bias, fracsize, exposize, max_exponent, assemble
+
+"""
+    uint(::Type{F})
+
+Returns the unsigned integer type that can hold the bit pattern of a floating point number of type `F`.
+"""
+uint(::Type{Float16}) = UInt16
+"""
+    uint(::F)
+
+Returns the zero value of the unsigned integer type that can hold the bit pattern of a floating point number of type `F`.
+"""
+uint(::Float16) = zero(UInt16)
+uint(::Type{Float32}) = UInt32
+uint(::Float32) = zero(UInt32)
+uint(::Type{Float64}) = UInt64
+uint(::Float64) = zero(UInt64)
+
+"""
+    fracsize(::Type{F})
+
+Returns the number of bits in the fractional part of a floating point number of type `F`.
+"""
+fracsize(::Type{Float16}) = 10
+fracsize(::Type{Float32}) = 23
+fracsize(::Type{Float64}) = 52
+
+
+"""
+    exposize(::Type{F})
+
+Returns the number of bits in the exponent part of a floating point number of type `F`.
+"""
+exposize(::Type{Float16}) = 5
+exposize(::Type{Float32}) = 8
+exposize(::Type{Float64}) = 11
+
+"""
+    max_exponent(::Type{F})
+
+The maximum value of the exponent bits of a floating point number of type `F`.
+"""
+max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = (1 << exposize(T) - 1) % uint(T)
+"""
+    bias(::Type{F})
+
+The IEEE 754 bias of the exponent bits of a floating point number of type `F`.
+"""
+bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1)
+
+function masks(::Type{T}) where {T<:Base.IEEEFloat}
+    ui = uint(T)
+    signbitmask = one(ui) << (8 * sizeof(ui) - 1)
+    fracbitmask = (-1 % ui) >> (8 * sizeof(ui) - fracsize(T))
+    expobitmask = ((-1 % ui) >> (8 * sizeof(ui) - exposize(T))) << fracsize(T)
+    signbitmask, expobitmask, fracbitmask
+end
+
+"""
+    assemble(::T, sign::I, expo::I, frac::I) where {I, T <: Union{Float16, Float32, Float64}} -> T
+
+Assembles `sign`, `expo` and `frac` arguments into the floating point number of type `T` it represents.
+`sizeof(T)` must match `sizeof(I)`.
+"""
+function assemble(::Type{T}, sign::I, expo::I, frac::I) where {I,T<:Base.IEEEFloat}
+    sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of  `$T` needs to match the other arguments of type `I`!"))
+    signmask, expomask, fracmask = masks(T)
+    sign = (sign << (exposize(T) + fracsize(T))) & signmask
+    expo = (expo << fracsize(T)) & expomask
+    frac = frac & fracmask
+    ret = sign | expo | frac
+    reinterpret(T, ret)
+end
+
+"""
+    tear(x::T) where T <: Union{Float16, Float32, Float64} -> Tuple{I, I, I}
+
+Returns the sign, exponent and fractional parts of a floating point number.
+The returned tuple consists of three unsigned integer types `I` of the same bitwidth as `T`.
+"""
+function tear(x::T) where {T<:Base.IEEEFloat}
+    signbitmask, expobitmask, fracbitmask = masks(T)
+    ur = reinterpret(uint(T), x)
+    s = (ur & signbitmask) >> (exposize(T) + fracsize(T))
+    e = (ur & expobitmask) >> fracsize(T)
+    f = (ur & fracbitmask) >> 0x0
+    s, e, f
+end
 
 """
     exponent_key(T, e)
diff --git a/src/util.jl b/src/util.jl
index 768e32b..ffd38b4 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -13,60 +13,6 @@ function windows(array, a,b)
     head, middle, tail
 end
 
-uint(::Type{Float16}) =      UInt16
-uint(::     Float16)  = zero(UInt16)
-uint(::Type{Float32}) =      UInt32
-uint(::     Float32)  = zero(UInt32)
-uint(::Type{Float64}) =      UInt64
-uint(::     Float64)  = zero(UInt64)
-fracsize(::Type{Float16}) = 10
-fracsize(::Type{Float32}) = 23
-fracsize(::Type{Float64}) = 52
-exposize(::Type{Float16}) = 5
-exposize(::Type{Float32}) = 8
-exposize(::Type{Float64}) = 11
-
-max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = (1 << exposize(T) - 1) % uint(T)
-bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1)
-
-function masks(::Type{T}) where T <: Base.IEEEFloat
-    ui = uint(T)
-    signbitmask = one(ui) << (8*sizeof(ui)-1)
-    fracbitmask =  (-1 % ui) >> (8*sizeof(ui)-fracsize(T))
-    expobitmask = ((-1 % ui) >> (8*sizeof(ui)-exposize(T))) << fracsize(T)
-    signbitmask, expobitmask, fracbitmask
-end
-
-"""
-    assemble(::T, sign::I, expo::I, frac::I) where {I, T <: Union{Float16, Float32, Float64}} -> T
-
-Assembles `sign`, `expo` and `frac` arguments into the floating point number of type `T` it represents.
-`sizeof(T)` must match `sizeof(I)`.
-"""
-function assemble(::Type{T}, sign::I, expo::I, frac::I) where {I, T <: Base.IEEEFloat}
-    sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of  `$T` needs to match the other arguments of type `I`!"))
-    signmask, expomask, fracmask = masks(T)
-    sign = (sign << (exposize(T) + fracsize(T))) & signmask
-    expo = (expo <<                fracsize(T))  & expomask
-    frac =  frac                                 & fracmask
-    ret =  sign | expo | frac
-    return reinterpret(T, ret)
-end
-
-"""
-    tear(x::T) where T <: Union{Float16, Float32, Float64} -> Tuple{I, I, I}
-
-Returns the sign, exponent and fractional parts of a floating point number.
-The returned tuple consists of three unsigned integer types `I` of the same bitwidth as `T`.
-"""
-function tear(x::T) where T <: Base.IEEEFloat
-    signbitmask, expobitmask, fracbitmask = masks(T)
-    ur = reinterpret(uint(T), x)
-    s = (ur & signbitmask) >> (exposize(T) + fracsize(T))
-    e = (ur & expobitmask) >>                fracsize(T)
-    f = (ur & fracbitmask) >>                        0x0
-    return (s, e, f)
-end
 
 lerp(x,y,t) = y*t + x*(1-t)
 function smootherstep(a, b, t)
diff --git a/test/runtests.jl b/test/runtests.jl
index 3e343cc..2324e01 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -517,12 +517,12 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
     @testset "Floating point encoding" begin
         @testset for T in (Float16, Float32, Float64)
 
-            iT = Supposition.uint(T)
+            iT = FloatEncoding.uint(T)
             # These invariants are ported from Hypothesis
             @testset "Exponent encoding" begin
                 # The highest value here is 0x7ff (2047) with Int64,
                 # so we can just exhaustively test all 2048 possibilities
-                exponents = zero(iT):Supposition.max_exponent(T)
+                exponents = zero(iT):FloatEncoding.max_exponent(T)
 
                 # Round tripping
                 @test all(exponents) do e
@@ -568,7 +568,7 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
                 @check order_integral_part(
                     Data.Floats{T}(;
                         minimum=one(T),
-                        maximum=T(2^(Supposition.fracsize(T) + 1)),
+                        maximum=T(2^(FloatEncoding.fracsize(T) + 1)),
                         nans=false),
                         Data.Floats{T}(; minimum=nextfloat(zero(T)), maximum=prevfloat(one(T)), nans=false))
 
@@ -1369,7 +1369,7 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
         @testset for T in (Float16, Float32, Float64)
             @check function floatfunc(f=Data.Floats{T}())
                 orig = bitstring(f)
-                reassembled = bitstring(Supposition.assemble(T, Supposition.tear(f)...))
+                reassembled = bitstring(FloatEncoding.assemble(T, FloatEncoding.tear(f)...))
                 orig == reassembled
             end
         end

From c1555cb7d512e09ce3609d5c8968bfd79ae0b652 Mon Sep 17 00:00:00 2001
From: Zach Raines <zach@zmraines.com>
Date: Fri, 19 Jul 2024 08:08:39 -0500
Subject: [PATCH 14/17] clean up and clarify docstrings for FloatEncoding

Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com>
---
 src/float.jl | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/float.jl b/src/float.jl
index cee1052..5c3a538 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -1,23 +1,22 @@
 """
     FloatEncoding
 
-This module includes utilities for maniuplating floating point numbers in an IEEE 754 encoding:
+This module includes utilities for manipulating floating point numbers in IEEE 754 encoding, focusing on the standard Julia types `Float16`, `Float32` and `Float64`.
 
 Additionally, it implements an encoding for floating point numbers that has better shrinking properties, ported from hypothesis. See [`lex_to_float`](@ref) for more details.
 """
 module FloatEncoding
 
 """
-    uint(::Type{F})
+    uint(::Type{F}) where F <: Base.IEEEFloat
+    uint(::F) where F <: Base.IEEEFloat
 
-Returns the unsigned integer type that can hold the bit pattern of a floating point number of type `F`.
+When given a type, returns the unsigned integer type `T` that can hold the bit pattern of a floating point number of type `F`.
+When given an object, returns `zero(T)`.
 """
-uint(::Type{Float16}) = UInt16
-"""
-    uint(::F)
+function uint end
 
-Returns the zero value of the unsigned integer type that can hold the bit pattern of a floating point number of type `F`.
-"""
+uint(::Type{Float16}) = UInt16
 uint(::Float16) = zero(UInt16)
 uint(::Type{Float32}) = UInt32
 uint(::Float32) = zero(UInt32)
@@ -46,13 +45,15 @@ exposize(::Type{Float64}) = 11
 """
     max_exponent(::Type{F})
 
-The maximum value of the exponent bits of a floating point number of type `F`.
+The maximum value of the exponent bits of a floating point number of type `F`, given as a `uint(F)`.
 """
 max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = (1 << exposize(T) - 1) % uint(T)
 """
     bias(::Type{F})
 
 The IEEE 754 bias of the exponent bits of a floating point number of type `F`.
+
+See also [Wikipedia: Exponent bias](https://en.wikipedia.org/wiki/Exponent_bias).
 """
 bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1)
 
@@ -98,9 +99,11 @@ end
 """
     exponent_key(T, e)
 
-A lexicographical ordering for floating point exponents. The encoding is ported
+A lexicographical ordering for floating point exponents `e`, for the given floating point type `T`. The encoding is ported
 from Hypothesis.
 
+`e` is expected to be in the closed interval `[0, max_exponent(T)]`.
+
 The ordering is
 
     * non-negative exponents in increasing order
@@ -131,7 +134,7 @@ end
     _make_encoding_table(T)
 
 Build a look up table for encoding exponents of floating point numbers of type `T`.
-For a floating point type `T`, the lookup table is a permutation of the unsigned integers of type [`uint`](@ref) from `0` to `max_exponent(T)`.
+For a floating point type `T`, the lookup table is a permutation of the unsigned integers of type [`uint(T)`](@ref) from `0` to `max_exponent(T)`.
 
 This allows the reordering of the exponent bits of a floating point number according to the encoding described in [`exponent_key`](@ref).
 """
@@ -143,6 +146,8 @@ _make_encoding_table(T) = sort(zero(uint(T)):max_exponent(T);
 A dictionary mapping `Unsigned` types to encoding tables for exponents.
 The encoding is described in [`exponent_key`](@ref) and is ported from Hypothesis.
 
+Currently, only `UInt16`, `UInt32` and `UInt64` are available.
+
 # See Also
 
 [`encode_exponent`](@ref)
@@ -186,6 +191,8 @@ end
 A dictionary mapping `Unsigned` types to decoding tables for exponents.
 The encoding is described in [`exponent_key`](@ref) and is ported from Hypothesis.
 
+Currently, only `UInt16`, `UInt32` and `UInt64` are available.
+
 # See Also
 
 [`decode_exponent`](@ref)
@@ -246,8 +253,8 @@ end
 """
     lex_to_float(T, bits)
 
-Reinterpret the bits of a floating point number using an encoding with better shrinking
-properties.
+Convert the given `bits` of the lexicographical encoding to a floating point number of type `T`.
+The bitwidth of `T` must match `sizeof(bits)`.
 This produces a non-negative floating point number, possibly including `NaN` or `Inf`.
 
 The encoding is ported from Hypothesis, and has the property that lexicographically smaller
@@ -271,7 +278,6 @@ If the sign bit is not set:
 ## Extended Help
 
 See the [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L176) in Hypothesis.
-
 """
 function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat}
     sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bitwidth of the given bits!"))
@@ -296,11 +302,13 @@ Encoding a floating point number as a bit pattern.
 This is essentially the inverse of [`lex_to_float`](@ref) and produces a bit pattern that is lexicographically
 smaller for 'simpler' floats.
 
-Note that while `lex_to_float` can produce any valid positive floating point number, it is not injective. So combined with the fact that positive and negative floats map to the same bit pattern,
-`float_to_lex` is not an exact inverse of `lex_to_float`.
+!!! note "Injectivity"
+    Note that while `lex_to_float` can produce any valid positive floating point number, it is not injective.
+    So combined with the fact that positive and negative floats map to the same bit pattern, `float_to_lex`
+    is not an exact inverse of `lex_to_float`.
 """
 function float_to_lex(f::T) where {T<:Base.IEEEFloat}
-    # If the float is simple, we can just reinterpret it as an integer
+    # If the float is simple, we can just convert it to an integer
     # This corresponds to the latter branch of lex_to_float
     if is_simple_float(f)
         uint(T)(f)
@@ -312,7 +320,7 @@ end
 """
     is_simple_float(f)
 
-`f` is simple if it is integral and the first byte is all zeros.
+Return whether the given floating point number is considered "simple" in terms of the lexicographic encoding.
 """
 function is_simple_float(f::T) where {T<:Base.IEEEFloat}
     if trunc(f) != f

From 9f39ce8c27d5089277f221ee93c07f8e636a8e90 Mon Sep 17 00:00:00 2001
From: Zach Raines <zach@zmraines.com>
Date: Fri, 19 Jul 2024 08:10:10 -0500
Subject: [PATCH 15/17] refactor claculation of bias

Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com>
---
 src/float.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/float.jl b/src/float.jl
index 5c3a538..be7a62d 100644
--- a/src/float.jl
+++ b/src/float.jl
@@ -55,7 +55,7 @@ The IEEE 754 bias of the exponent bits of a floating point number of type `F`.
 
 See also [Wikipedia: Exponent bias](https://en.wikipedia.org/wiki/Exponent_bias).
 """
-bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1)
+bias(::Type{T}) where {T<:Base.IEEEFloat} = (1 << (exposize(T) - 1) - 1) % uint(T)
 
 function masks(::Type{T}) where {T<:Base.IEEEFloat}
     ui = uint(T)

From add36a1c363e936bd159654a1b01c763941abb86 Mon Sep 17 00:00:00 2001
From: Zach Raines <dev@zmraines.com>
Date: Fri, 19 Jul 2024 08:53:44 -0500
Subject: [PATCH 16/17] refactor encoding tests for NaN edgecases

The test was previously failing because NaNs were being generated with
the signbit set, but lex_to_float only generates floats with an
unset signbit.

Refactored the tests to test for all "positive" NaNs, separately
from other positive float.
---
 test/runtests.jl | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 2324e01..71f3348 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -534,6 +534,8 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
                 end
             end
 
+            # check that the encoding roundtrips properly
+            # N.B. this property only holds for floats with the signbit unset
             function roundtrip_encoding(f)
                 encoded = FloatEncoding.float_to_lex(f)
                 decoded = FloatEncoding.lex_to_float(T, encoded)
@@ -552,7 +554,16 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
                 @test roundtrip_encoding(f)
             end
 
-            @check roundtrip_encoding(Data.Floats{T}(; minimum=nextfloat(zero(T))))
+            @check roundtrip_encoding(Data.Floats{T}(; minimum=nextfloat(zero(T)), nans=false))
+
+            # sample from all possible NaN bit patterns
+            # with the signbit unset
+            nan_gen =
+                map(bits ->
+                        FloatEncoding.assemble(T, zero(iT), FloatEncoding.max_exponent(T), bits),
+                    Data.Integers(zero(iT), (1 << FloatEncoding.fracsize(T) - 1) % iT))
+
+            @check roundtrip_encoding(nan_gen)
 
             @testset "Ordering" begin
                 function order_integral_part(n, g)

From e6942696572a975f03909a566dbf3408d8a4d81c Mon Sep 17 00:00:00 2001
From: Zach Raines <zach@zmraines.com>
Date: Fri, 19 Jul 2024 09:36:05 -0500
Subject: [PATCH 17/17] simplify encoding testing for nans

Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com>
---
 test/runtests.jl | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 71f3348..a5270cb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -554,16 +554,12 @@ const verb = VERSION.major == 1 && VERSION.minor < 11
                 @test roundtrip_encoding(f)
             end
 
-            @check roundtrip_encoding(Data.Floats{T}(; minimum=nextfloat(zero(T)), nans=false))
-
-            # sample from all possible NaN bit patterns
-            # with the signbit unset
-            nan_gen =
-                map(bits ->
-                        FloatEncoding.assemble(T, zero(iT), FloatEncoding.max_exponent(T), bits),
-                    Data.Integers(zero(iT), (1 << FloatEncoding.fracsize(T) - 1) % iT))
-
-            @check roundtrip_encoding(nan_gen)
+             # `roundtrip_encoding` assumes the signbit is unset
+             roundtrip_gen = map(Data.Floats{T}()) do f
+                 _, exp, frac = FloatEncoding.tear(f)
+                 FloatEncoding.assemble(T, zero(iT), exp, frac)
+             end
+            @check roundtrip_encoding(roundtrip_gen)
 
             @testset "Ordering" begin
                 function order_integral_part(n, g)