From 08a170d3bdb961728b1e10e3a4d04ae7f9d2a103 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Tue, 16 Jul 2024 11:02:30 -0500 Subject: [PATCH 01/17] generate floats using lexographically ordered encoding This uses the same encoding as `hypothesis` to give floats better shrinking properties. --- src/data.jl | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/util.jl | 3 ++ 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/src/data.jl b/src/data.jl index 39a3121..ae352b9 100644 --- a/src/data.jl +++ b/src/data.jl @@ -39,7 +39,7 @@ as well as these utility functions: module Data using Supposition -using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject +using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject, max_exponent, bias, assemble, tear, exposize, fracsize using RequiredInterfaces: @required using StyledStrings: @styled_str using Printf: format, @format_str @@ -1449,9 +1449,118 @@ function Base.show(io::IO, ::MIME"text/plain", f::Floats) E.g. {code:$obj}; {code:isinf}: $inf, {code:isnan}: $nan""") end +""" + exponent_key(T, e) + +A lexographical ordering for floating point exponents. The encoding is taken +from hypothesis. +The ordering is +- non-negative exponents in increasing order +- negative exponents in decreasing order +- the maximum exponent +""" +function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned} + if e == max_exponent(T) + return Inf + end + unbiased = float(e) - bias(T) + if unbiased < 0 + 10000 - unbiased + else + unbiased + end +end + +_make_encoding_table(T) = sort( + zero(Supposition.uint(T)):max_exponent(T), + by=x -> exponent_key(T, x)) +const ENCODING_TABLE = Dict( + UInt16 => _make_encoding_table(Float16), + UInt32 => _make_encoding_table(Float32), + UInt64 => _make_encoding_table(Float64)) + + +""" + update_mantissa(exponent, mantissa) + +Encode the mantissa of a floating point number using an encoding with better shrinking. + +""" +function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned} + @assert Supposition.uint(T) == iT + # The unbiased exponent is <= 0 + if exponent <= bias(T) + # reverse the bits of the mantissa in place + bitreverse(mantissa) >> (exposize(T) + 1) + elseif exponent >= fracsize(T) + bias(T) + mantissa + else + # reverse the low bits of the fractional part + # as determined by the exponent + n_reverse_bits = fracsize(T) + bias(T) - exponent + # isolate the bits to be reversed + to_reverse = mantissa & iT((1 << n_reverse_bits) - 1) + # zero them out + mantissa = mantissa ⊻ to_reverse + # reverse them and put them back in place + mantissa |= bitreverse(to_reverse) >> (8 * sizeof(T) - n_reverse_bits) + end +end + + +""" + lexographical_float(T, bits) + +Reinterpret the bits of a floating point number using an encoding with better shrinking +properties. +This produces a non-negative floating point number, possibly including NaN or Inf. + +The encoding is taken from hypothesis, and has the property that lexicographically smaller +bit patterns corespond to 'simpler' floats. + +# Encoding + +The encoding used is as follows: + +If the sign bit is set: + + - the remainder of the first byte is ignored + - the remaining bytes are interpreted as an integer and converted to a float + +If the sign bit is not set: + + - the exponent is decoded using `decode_exponent` + - the mantissa is updated using `update_mantissa` + - the float is reassembled using `assemble` + +""" +function lexographical_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} + sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!")) + iT = Supposition.uint(T) + sign, exponent, mantissa = tear(reinterpret(T, bits)) + if sign == 1 + exponent = ENCODING_TABLE[iT][exponent+1] + mantissa = update_mantissa(T, exponent, mantissa) + assemble(T, zero(iT), exponent, mantissa) + else + integral_mask = iT((1 << (8 * (sizeof(T) - 1))) - 1) + integral_part = bits & integral_mask + T(integral_part) + end +end + function produce!(tc::TestCase, f::Floats{T}) where {T} iT = Supposition.uint(T) - res = reinterpret(T, produce!(tc, Integers{iT}())) + + bits = produce!(tc, Integers{iT}()) + + is_negative = produce!(tc, Booleans()) + + res = lexographical_float(T, bits) + if is_negative + res = -res + end + # early rejections !f.infs && isinf(res) && reject(tc) !f.nans && isnan(res) && reject(tc) diff --git a/src/util.jl b/src/util.jl index 86012a2..1e1402f 100644 --- a/src/util.jl +++ b/src/util.jl @@ -26,6 +26,9 @@ exposize(::Type{Float16}) = 5 exposize(::Type{Float32}) = 8 exposize(::Type{Float64}) = 11 +max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << exposize(T) - 1) +bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1) + function masks(::Type{T}) where T <: Base.IEEEFloat ui = uint(T) signbitmask = one(ui) << (8*sizeof(ui)-1) From 1df83c0c1374c9b51e0923258506125c7a200fd3 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Wed, 17 Jul 2024 09:26:26 -0500 Subject: [PATCH 02/17] add tests for encoding of float exponent --- src/Supposition.jl | 1 + src/data.jl | 102 +------------------------------ src/float.jl | 147 +++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 21 ++++++- 4 files changed, 170 insertions(+), 101 deletions(-) create mode 100644 src/float.jl diff --git a/src/Supposition.jl b/src/Supposition.jl index a5ae9b7..012dca2 100644 --- a/src/Supposition.jl +++ b/src/Supposition.jl @@ -35,6 +35,7 @@ using StyledStrings include("types.jl") include("testcase.jl") include("util.jl") +include("float.jl") include("data.jl") include("teststate.jl") include("shrink.jl") diff --git a/src/data.jl b/src/data.jl index ae352b9..ba3fce0 100644 --- a/src/data.jl +++ b/src/data.jl @@ -39,7 +39,8 @@ as well as these utility functions: module Data using Supposition -using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject, max_exponent, bias, assemble, tear, exposize, fracsize +using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject +using Supposition.FloatEncoding: lexographical_float using RequiredInterfaces: @required using StyledStrings: @styled_str using Printf: format, @format_str @@ -1449,105 +1450,6 @@ function Base.show(io::IO, ::MIME"text/plain", f::Floats) E.g. {code:$obj}; {code:isinf}: $inf, {code:isnan}: $nan""") end -""" - exponent_key(T, e) - -A lexographical ordering for floating point exponents. The encoding is taken -from hypothesis. -The ordering is -- non-negative exponents in increasing order -- negative exponents in decreasing order -- the maximum exponent -""" -function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned} - if e == max_exponent(T) - return Inf - end - unbiased = float(e) - bias(T) - if unbiased < 0 - 10000 - unbiased - else - unbiased - end -end - -_make_encoding_table(T) = sort( - zero(Supposition.uint(T)):max_exponent(T), - by=x -> exponent_key(T, x)) -const ENCODING_TABLE = Dict( - UInt16 => _make_encoding_table(Float16), - UInt32 => _make_encoding_table(Float32), - UInt64 => _make_encoding_table(Float64)) - - -""" - update_mantissa(exponent, mantissa) - -Encode the mantissa of a floating point number using an encoding with better shrinking. - -""" -function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned} - @assert Supposition.uint(T) == iT - # The unbiased exponent is <= 0 - if exponent <= bias(T) - # reverse the bits of the mantissa in place - bitreverse(mantissa) >> (exposize(T) + 1) - elseif exponent >= fracsize(T) + bias(T) - mantissa - else - # reverse the low bits of the fractional part - # as determined by the exponent - n_reverse_bits = fracsize(T) + bias(T) - exponent - # isolate the bits to be reversed - to_reverse = mantissa & iT((1 << n_reverse_bits) - 1) - # zero them out - mantissa = mantissa ⊻ to_reverse - # reverse them and put them back in place - mantissa |= bitreverse(to_reverse) >> (8 * sizeof(T) - n_reverse_bits) - end -end - - -""" - lexographical_float(T, bits) - -Reinterpret the bits of a floating point number using an encoding with better shrinking -properties. -This produces a non-negative floating point number, possibly including NaN or Inf. - -The encoding is taken from hypothesis, and has the property that lexicographically smaller -bit patterns corespond to 'simpler' floats. - -# Encoding - -The encoding used is as follows: - -If the sign bit is set: - - - the remainder of the first byte is ignored - - the remaining bytes are interpreted as an integer and converted to a float - -If the sign bit is not set: - - - the exponent is decoded using `decode_exponent` - - the mantissa is updated using `update_mantissa` - - the float is reassembled using `assemble` - -""" -function lexographical_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} - sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!")) - iT = Supposition.uint(T) - sign, exponent, mantissa = tear(reinterpret(T, bits)) - if sign == 1 - exponent = ENCODING_TABLE[iT][exponent+1] - mantissa = update_mantissa(T, exponent, mantissa) - assemble(T, zero(iT), exponent, mantissa) - else - integral_mask = iT((1 << (8 * (sizeof(T) - 1))) - 1) - integral_part = bits & integral_mask - T(integral_part) - end -end function produce!(tc::TestCase, f::Floats{T}) where {T} iT = Supposition.uint(T) diff --git a/src/float.jl b/src/float.jl new file mode 100644 index 0000000..8704e64 --- /dev/null +++ b/src/float.jl @@ -0,0 +1,147 @@ +module FloatEncoding +using Supposition: uint, tear, bias, fracsize, exposize, max_exponent, assemble + +""" + exponent_key(T, e) + +A lexographical ordering for floating point exponents. The encoding is taken +from hypothesis. +The ordering is +- non-negative exponents in increasing order +- negative exponents in decreasing order +- the maximum exponent +""" +function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned} + if e == max_exponent(T) + return Inf + end + unbiased = float(e) - bias(T) + if unbiased < 0 + 10000 - unbiased + else + unbiased + end +end + +_make_encoding_table(T) = sort( + zero(uint(T)):max_exponent(T), + by=Base.Fix1(exponent_key, T)) +const ENCODING_TABLE = Dict( + UInt16 => _make_encoding_table(Float16), + UInt32 => _make_encoding_table(Float32), + UInt64 => _make_encoding_table(Float64)) + +encode_exponent(e::T) where {T<:Unsigned} = ENCODING_TABLE[T][e+1] + +function _make_decoding_table(T) + decoding_table = zeros(uint(T), max_exponent(T) + 1) + for (i, e) in enumerate(ENCODING_TABLE[uint(T)]) + decoding_table[e+1] = i - 1 + end + decoding_table +end +const DECODING_TABLE = Dict( + UInt16 => _make_decoding_table(Float16), + UInt32 => _make_decoding_table(Float32), + UInt64 => _make_decoding_table(Float64)) +decode_exponent(e::T) where {T<:Unsigned} = DECODING_TABLE[T][e+1] + + +""" + update_mantissa(exponent, mantissa) + +Encode the mantissa of a floating point number using an encoding with better shrinking. +""" +function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned} + @assert uint(T) == iT + # The unbiased exponent is <= 0 + if exponent <= bias(T) + # reverse the bits of the mantissa in place + bitreverse(mantissa) >> (exposize(T) + 1) + elseif exponent >= fracsize(T) + bias(T) + mantissa + else + # reverse the low bits of the fractional part + # as determined by the exponent + n_reverse_bits = fracsize(T) + bias(T) - exponent + # isolate the bits to be reversed + to_reverse = mantissa & iT((1 << n_reverse_bits) - 1) + # zero them out + mantissa = mantissa ⊻ to_reverse + # reverse them and put them back in place + mantissa |= bitreverse(to_reverse) >> (8 * sizeof(T) - n_reverse_bits) + end +end + + +""" + lexographical_float(T, bits) + +Reinterpret the bits of a floating point number using an encoding with better shrinking +properties. +This produces a non-negative floating point number, possibly including NaN or Inf. + +The encoding is taken from hypothesis, and has the property that lexicographically smaller +bit patterns corespond to 'simpler' floats. + +# Encoding + +The encoding used is as follows: + +If the sign bit is set: + + - the remainder of the first byte is ignored + - the remaining bytes are interpreted as an integer and converted to a float + +If the sign bit is not set: + + - the exponent is decoded using `decode_exponent` + - the mantissa is updated using `update_mantissa` + - the float is reassembled using `assemble` + +""" +function lexographical_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} + sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!")) + iT = uint(T) + sign, exponent, mantissa = tear(reinterpret(T, bits)) + if sign == 1 + exponent = encode_exponent(exponent) + mantissa = update_mantissa(T, exponent, mantissa) + assemble(T, zero(iT), exponent, mantissa) + else + integral_mask = iT((1 << (8 * (sizeof(T) - 1))) - 1) + integral_part = bits & integral_mask + T(integral_part) + end +end + +function float_to_lex(f::T) where {T<:Base.IEEEFloat} + if is_simple_float(f) + uint(T)(f) + else + base_float_to_lex(f) + end +end + +function is_simple_float(f::T) where {T<:Base.IEEEFloat} + try + if trunc(f) != f + return false + end + Base.top_set_bit(uint(f)) <= 8 * (sizeof(T) - 1) + catch e + if isa(e, InexactError) + return false + end + rethrow(e) + end +end + +function base_float_to_lex(f::T) where {T<:Base.IEEEFloat} + sign, exponent, mantissa = tear(f) + mantissa = update_mantissa(T, exponent, mantissa) + exponent = decode_exponent(exponent) + + reinterpret(uint(T), assemble(T, sign, exponent, mantissa)) +end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 166c92a..44f10ca 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,5 @@ using Supposition -using Supposition: Data, test_function, shrink_remove, shrink_redistribute, +using Supposition: Data, FloatEncoding, test_function, shrink_remove, shrink_redistribute, NoRecordDB, UnsetDB, Attempt, DEFAULT_CONFIG, TestCase, TestState, choice!, weighted! using Test using Aqua @@ -513,6 +513,25 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 @test_throws ArgumentError Data.Floats(;minimum=2.0, maximum=1.0) end + # Tests the properties of the enocding used to represent floating point numbers + @testset "Floating point encoding" begin + @testset for T in (Float16, Float32, Float64) + # These invariants are ported from Hypothesis + @testset "Exponent encoding" begin + exponents = zero(Supposition.uint(T)):Supposition.max_exponent(T) + + # Round tripping + @test all(exponents) do e + FloatEncoding.decode_exponent(FloatEncoding.encode_exponent(e)) == e + end + + @test all(exponents) do e + FloatEncoding.encode_exponent(FloatEncoding.decode_exponent(e)) == e + end + end + end + end + @testset "@check API" begin # These tests are for accepted syntax, not functionality, so only one example is fine API_conf = Supposition.merge(DEFAULT_CONFIG[]; verbose=verb, max_examples=1) From e58ac23889509e37731d26ca8600d25dc4b308ab Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Wed, 17 Jul 2024 12:24:24 -0500 Subject: [PATCH 03/17] port float encoding tests from hypothesis This just ports the tests that check the ordering properties of the encoding and fixes the bugs that were found by the tests. --- src/data.jl | 4 ++-- src/float.jl | 8 +++---- test/runtests.jl | 58 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 7 deletions(-) diff --git a/src/data.jl b/src/data.jl index ba3fce0..3fce06f 100644 --- a/src/data.jl +++ b/src/data.jl @@ -40,7 +40,7 @@ module Data using Supposition using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject -using Supposition.FloatEncoding: lexographical_float +using Supposition.FloatEncoding: lex_to_float using RequiredInterfaces: @required using StyledStrings: @styled_str using Printf: format, @format_str @@ -1458,7 +1458,7 @@ function produce!(tc::TestCase, f::Floats{T}) where {T} is_negative = produce!(tc, Booleans()) - res = lexographical_float(T, bits) + res = lex_to_float(T, bits) if is_negative res = -res end diff --git a/src/float.jl b/src/float.jl index 8704e64..a8e6eaa 100644 --- a/src/float.jl +++ b/src/float.jl @@ -75,7 +75,7 @@ end """ - lexographical_float(T, bits) + lex_to_float(T, bits) Reinterpret the bits of a floating point number using an encoding with better shrinking properties. @@ -100,7 +100,7 @@ If the sign bit is not set: - the float is reassembled using `assemble` """ -function lexographical_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} +function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!")) iT = uint(T) sign, exponent, mantissa = tear(reinterpret(T, bits)) @@ -128,7 +128,7 @@ function is_simple_float(f::T) where {T<:Base.IEEEFloat} if trunc(f) != f return false end - Base.top_set_bit(uint(f)) <= 8 * (sizeof(T) - 1) + Base.top_set_bit(reinterpret(uint(T), f)) <= 8 * (sizeof(T) - 1) catch e if isa(e, InexactError) return false @@ -142,6 +142,6 @@ function base_float_to_lex(f::T) where {T<:Base.IEEEFloat} mantissa = update_mantissa(T, exponent, mantissa) exponent = decode_exponent(exponent) - reinterpret(uint(T), assemble(T, sign, exponent, mantissa)) + reinterpret(uint(T), assemble(T, one(uint(T)), exponent, mantissa)) end end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 44f10ca..8427069 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -516,9 +516,11 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 # Tests the properties of the enocding used to represent floating point numbers @testset "Floating point encoding" begin @testset for T in (Float16, Float32, Float64) + + iT = Supposition.uint(T) # These invariants are ported from Hypothesis @testset "Exponent encoding" begin - exponents = zero(Supposition.uint(T)):Supposition.max_exponent(T) + exponents = zero(iT):Supposition.max_exponent(T) # Round tripping @test all(exponents) do e @@ -529,6 +531,60 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 FloatEncoding.encode_exponent(FloatEncoding.decode_exponent(e)) == e end end + + function roundtrip_encoding(f) + assume!(!signbit(f)) + encoded = FloatEncoding.float_to_lex(f) + decoded = FloatEncoding.lex_to_float(T, encoded) + reinterpret(iT, decoded) == reinterpret(iT, f) + end + + roundtrip_examples = map(Data.Just, + T[ + 0.0, + 2.5, + 8.000000000000007, + 3.0, + 2.0, + 1.9999999999999998, + 1.0 + ]) + @check roundtrip_encoding(Data.OneOf(roundtrip_examples...)) + @check roundtrip_encoding(Data.Floats{T}(; minimum=zero(T))) + + @testset "Ordering" begin + function order_integral_part(n, g) + f = n + g + assume!(trunc(f) != f) + assume!(trunc(f) != 0) + i = FloatEncoding.float_to_lex(f) + g = trunc(f) + FloatEncoding.float_to_lex(g) < i + end + + @check order_integral_part(Data.Just(1.0), Data.Just(0.5)) + @check order_integral_part( + Data.Floats{T}(; + minimum=one(T), + maximum=T(2^(Supposition.fracsize(T) + 1)), + nans=false), + filter(x -> !(x in T[0, 1]), + Data.Floats{T}(; minimum=zero(T), maximum=one(T), nans=false))) + + integral_float_gen = map(abs ∘ trunc, + Data.Floats{T}(; minimum=zero(T), infs=false, nans=false)) + + @check function integral_floats_order_as_integers(x=integral_float_gen, + y=integral_float_gen) + (x < y) == (FloatEncoding.float_to_lex(x) < FloatEncoding.float_to_lex(y)) + end + + @check function fractional_floats_greater_than_1( + f=Data.Floats{T}(; minimum=zero(T), maximum=one(T), nans=false)) + assume!(0 < f < 1) + FloatEncoding.float_to_lex(f) > FloatEncoding.float_to_lex(one(T)) + end + end end end From 4ecb381f0bdc5261eaa1670f873280a79bbadeeb Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Wed, 17 Jul 2024 13:46:35 -0500 Subject: [PATCH 04/17] replace top_set_bit to be compatible with older julia --- src/float.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/float.jl b/src/float.jl index a8e6eaa..2f6e9b5 100644 --- a/src/float.jl +++ b/src/float.jl @@ -128,7 +128,7 @@ function is_simple_float(f::T) where {T<:Base.IEEEFloat} if trunc(f) != f return false end - Base.top_set_bit(reinterpret(uint(T), f)) <= 8 * (sizeof(T) - 1) + ndigits(reinterpret(uint(T), f), base=2) <= 8 * (sizeof(T) - 1) catch e if isa(e, InexactError) return false @@ -138,7 +138,7 @@ function is_simple_float(f::T) where {T<:Base.IEEEFloat} end function base_float_to_lex(f::T) where {T<:Base.IEEEFloat} - sign, exponent, mantissa = tear(f) + _, exponent, mantissa = tear(f) mantissa = update_mantissa(T, exponent, mantissa) exponent = decode_exponent(exponent) From 822574efad146e71e70245521ffc1e7e99a82458 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 09:51:42 -0500 Subject: [PATCH 05/17] docstring and formatting changes from code review Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- src/float.jl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/float.jl b/src/float.jl index 2f6e9b5..9fd5951 100644 --- a/src/float.jl +++ b/src/float.jl @@ -23,9 +23,8 @@ function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned} end end -_make_encoding_table(T) = sort( - zero(uint(T)):max_exponent(T), - by=Base.Fix1(exponent_key, T)) +_make_encoding_table(T) = sort(zero(uint(T)):max_exponent(T); + by = Base.Fix1(exponent_key, T)) const ENCODING_TABLE = Dict( UInt16 => _make_encoding_table(Float16), UInt32 => _make_encoding_table(Float32), @@ -79,10 +78,10 @@ end Reinterpret the bits of a floating point number using an encoding with better shrinking properties. -This produces a non-negative floating point number, possibly including NaN or Inf. +This produces a non-negative floating point number, possibly including `NaN` or `Inf`. -The encoding is taken from hypothesis, and has the property that lexicographically smaller -bit patterns corespond to 'simpler' floats. +The encoding is ported from hypothesis, and has the property that lexicographically smaller +bit patterns correspond to 'simpler' floats. # Encoding @@ -101,7 +100,7 @@ If the sign bit is not set: """ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} - sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bidwidth of `I`!")) + sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bitwidth of the given bits!")) iT = uint(T) sign, exponent, mantissa = tear(reinterpret(T, bits)) if sign == 1 From fb8907cda1d2cb502e92ca974729170c9cadd491 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 09:55:16 -0500 Subject: [PATCH 06/17] optimizations from code review Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- src/float.jl | 7 ++++--- src/util.jl | 2 +- test/runtests.jl | 3 +-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/float.jl b/src/float.jl index 9fd5951..d771199 100644 --- a/src/float.jl +++ b/src/float.jl @@ -103,12 +103,12 @@ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bitwidth of the given bits!")) iT = uint(T) sign, exponent, mantissa = tear(reinterpret(T, bits)) - if sign == 1 + if isone(sign) exponent = encode_exponent(exponent) mantissa = update_mantissa(T, exponent, mantissa) assemble(T, zero(iT), exponent, mantissa) else - integral_mask = iT((1 << (8 * (sizeof(T) - 1))) - 1) + integral_mask = iT(-1) >>> 0x8 integral_part = bits & integral_mask T(integral_part) end @@ -127,7 +127,8 @@ function is_simple_float(f::T) where {T<:Base.IEEEFloat} if trunc(f) != f return false end - ndigits(reinterpret(uint(T), f), base=2) <= 8 * (sizeof(T) - 1) + # In the encoding, the float is simple if the first byte is all zeros + leading_zeros(reinterpret(uint(T), f)) >= 8 catch e if isa(e, InexactError) return false diff --git a/src/util.jl b/src/util.jl index 1e1402f..768e32b 100644 --- a/src/util.jl +++ b/src/util.jl @@ -26,7 +26,7 @@ exposize(::Type{Float16}) = 5 exposize(::Type{Float32}) = 8 exposize(::Type{Float64}) = 11 -max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << exposize(T) - 1) +max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = (1 << exposize(T) - 1) % uint(T) bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1) function masks(::Type{T}) where T <: Base.IEEEFloat diff --git a/test/runtests.jl b/test/runtests.jl index 8427069..51ed347 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -568,8 +568,7 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 minimum=one(T), maximum=T(2^(Supposition.fracsize(T) + 1)), nans=false), - filter(x -> !(x in T[0, 1]), - Data.Floats{T}(; minimum=zero(T), maximum=one(T), nans=false))) + Data.Floats{T}(; minimum=nextfloat(zero(T)), maximum=prevfloat(one(T)), nans=false)) integral_float_gen = map(abs ∘ trunc, Data.Floats{T}(; minimum=zero(T), infs=false, nans=false)) From fb2b833fb5e63e4beec3f548a381a0763ac15891 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 09:55:44 -0500 Subject: [PATCH 07/17] improve formatting of docstring Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- src/float.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/float.jl b/src/float.jl index d771199..5135346 100644 --- a/src/float.jl +++ b/src/float.jl @@ -4,12 +4,13 @@ using Supposition: uint, tear, bias, fracsize, exposize, max_exponent, assemble """ exponent_key(T, e) -A lexographical ordering for floating point exponents. The encoding is taken +A lexicographical ordering for floating point exponents. The encoding is ported from hypothesis. + The ordering is -- non-negative exponents in increasing order -- negative exponents in decreasing order -- the maximum exponent + * non-negative exponents in increasing order + * negative exponents in decreasing order + * the maximum exponent """ function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned} if e == max_exponent(T) From d1ece54745744cc3b081deb7925005c94e42c9c6 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 10:06:59 -0500 Subject: [PATCH 08/17] explain exponent encoding tests Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- test/runtests.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 51ed347..8e36ab9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -520,6 +520,8 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 iT = Supposition.uint(T) # These invariants are ported from Hypothesis @testset "Exponent encoding" begin + # The highest value here is 0x7ff (2047) with Int64, + # so we can just exhaustively test all 2048 possibilities exponents = zero(iT):Supposition.max_exponent(T) # Round tripping From 9bfb8f53a235360952f5e7ee51f1a6068d3a1daf Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 10:44:30 -0500 Subject: [PATCH 09/17] fix calculation of integral_mask --- src/float.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/float.jl b/src/float.jl index 5135346..9d223be 100644 --- a/src/float.jl +++ b/src/float.jl @@ -109,7 +109,7 @@ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} mantissa = update_mantissa(T, exponent, mantissa) assemble(T, zero(iT), exponent, mantissa) else - integral_mask = iT(-1) >>> 0x8 + integral_mask = signed(iT)(-1) >>> 0x8 integral_part = bits & integral_mask T(integral_part) end From c207dc9dc00f7c70a62605960792c1dce3a3120b Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 11:22:53 -0500 Subject: [PATCH 10/17] add documentation for float encodings --- src/float.jl | 147 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 128 insertions(+), 19 deletions(-) diff --git a/src/float.jl b/src/float.jl index 9d223be..ae22798 100644 --- a/src/float.jl +++ b/src/float.jl @@ -5,12 +5,17 @@ using Supposition: uint, tear, bias, fracsize, exposize, max_exponent, assemble exponent_key(T, e) A lexicographical ordering for floating point exponents. The encoding is ported -from hypothesis. +from Hypothesis. The ordering is + * non-negative exponents in increasing order * negative exponents in decreasing order * the maximum exponent + +# Extended Help + +The [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L82) in Hypothesis. """ function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned} if e == max_exponent(T) @@ -18,21 +23,61 @@ function exponent_key(::Type{T}, e::iT) where {T<:Base.IEEEFloat,iT<:Unsigned} end unbiased = float(e) - bias(T) if unbiased < 0 - 10000 - unbiased + # order all negative exponents after the positive ones + # in reverse order + # max_exponent(T) - 1 maps to the key bias(T) + # so the first negative exponent maps to bias(T) + 1 + bias(T) - unbiased else unbiased end end +""" + _make_encoding_table(T) + +Build a look up table for encoding exponents of floating point numbers of type `T`. +For a floating point type `T`, the lookup table is a permutation of the unsigned integers of type [`uint`](@ref) from `0` to `max_exponent(T)`. + +This allows the reordering of the exponent bits of a floating point number according to the encoding described in [`exponent_key`](@ref). +""" _make_encoding_table(T) = sort(zero(uint(T)):max_exponent(T); by = Base.Fix1(exponent_key, T)) +""" + ENCODING_TABLE + +A dictionary mapping `Unsigned` types to encoding tables for exponents. +The encoding is described in [`exponent_key`](@ref) and is ported from Hypothesis. + +# See Also + +[`encode_exponent`](@ref) +[`DECODING_TABLE`](@ref) +""" const ENCODING_TABLE = Dict( UInt16 => _make_encoding_table(Float16), UInt32 => _make_encoding_table(Float32), UInt64 => _make_encoding_table(Float64)) +""" + encode_exponent(e) + +Encode the exponent of a floating point number using an encoding with better shrinking. +The exponent can be extracted from a floating point number `f` using [`tear`](@ref). + +# See Also + +[`ENCODING_TABLE`](@ref) +[`exponent_key`](@ref) +[`tear`](@ref) +""" encode_exponent(e::T) where {T<:Unsigned} = ENCODING_TABLE[T][e+1] +""" + _make_decoding_table(T) + +Build a look up table for decoding exponents of floating point numbers of type `T` which is the inverse of the table built by [`_make_encoding_table`](@ref). +""" function _make_decoding_table(T) decoding_table = zeros(uint(T), max_exponent(T) + 1) for (i, e) in enumerate(ENCODING_TABLE[uint(T)]) @@ -40,10 +85,28 @@ function _make_decoding_table(T) end decoding_table end + +""" + DECODING_TABLE + +A dictionary mapping `Unsigned` types to decoding tables for exponents. +The encoding is described in [`exponent_key`](@ref) and is ported from Hypothesis. + +# See Also + +[`decode_exponent`](@ref) +[`ENCODING_TABLE`](@ref) +""" const DECODING_TABLE = Dict( UInt16 => _make_decoding_table(Float16), UInt32 => _make_decoding_table(Float32), UInt64 => _make_decoding_table(Float64)) + +""" + decode_exponent(e) + +Undoes the encoding of the exponent of a floating point number used by [`encode_exponent`](@ref). +""" decode_exponent(e::T) where {T<:Unsigned} = DECODING_TABLE[T][e+1] @@ -51,6 +114,17 @@ decode_exponent(e::T) where {T<:Unsigned} = DECODING_TABLE[T][e+1] update_mantissa(exponent, mantissa) Encode the mantissa of a floating point number using an encoding with better shrinking. +The encoding is ported from Hypothesis. + +The encoding is as follows: + + * If the unbiased exponent is <= 0, reverse the bits of the mantissa + * If the unbiased exponent is >= fracsize(T) + bias(T), do nothing + * Otherwise, reverse the low bits of the fractional part + +# Extended help + +See the [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L165) in hypothesis """ function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned} @assert uint(T) == iT @@ -81,7 +155,7 @@ Reinterpret the bits of a floating point number using an encoding with better sh properties. This produces a non-negative floating point number, possibly including `NaN` or `Inf`. -The encoding is ported from hypothesis, and has the property that lexicographically smaller +The encoding is ported from Hypothesis, and has the property that lexicographically smaller bit patterns correspond to 'simpler' floats. # Encoding @@ -95,9 +169,13 @@ If the sign bit is set: If the sign bit is not set: - - the exponent is decoded using `decode_exponent` - - the mantissa is updated using `update_mantissa` - - the float is reassembled using `assemble` + - the exponent is encoded using [`encoded_exponent`](@ref) + - the mantissa is updated using [`update_mantissa`](@ref) + - the float is reassembled using [`assemble`](@ref) + +## Extended Help + +See the [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L176) in Hypothesis. """ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} @@ -115,30 +193,61 @@ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} end end +""" + float_to_lex(f) + +Encoding a floating point number as a bit pattern. + +This is essentially the inverse of [`lex_to_float`](@ref) and produces a bit pattern that is lexicographically +smaller for 'simpler' floats. + +Note that while `lex_to_float` can produce any valid positive floating point number, it is not injective. So combined with the fact that positive and negative floats map to the same bit pattern, +`float_to_lex` is not an exact inverse of `lex_to_float`. +""" function float_to_lex(f::T) where {T<:Base.IEEEFloat} + # If the float is simple, we can just reinterpret it as an integer + # This corresponds to the latter branch of lex_to_float if is_simple_float(f) uint(T)(f) else - base_float_to_lex(f) + nonsimple_float_to_lex(f) end end +""" + is_simple_float(f) + +`f` is simple if it is integral and the first byte is all zeros. +""" function is_simple_float(f::T) where {T<:Base.IEEEFloat} - try - if trunc(f) != f - return false - end - # In the encoding, the float is simple if the first byte is all zeros - leading_zeros(reinterpret(uint(T), f)) >= 8 - catch e - if isa(e, InexactError) - return false - end - rethrow(e) + if trunc(f) != f + return false end + # In the encoding, the float is simple if the first byte is all zeros + leading_zeros(reinterpret(uint(T), f)) >= 8 end -function base_float_to_lex(f::T) where {T<:Base.IEEEFloat} +""" + nonsimple_float_to_lex(f) + +Encode a floating point number as a bit pattern, when the float is not simple. + +This is the inverse of [`lex_to_float`](@ref) for bit patterns with the signbit set i.e., + +```jldoctest +julia> using Supposition.FloatEncoding: lex_to_float, nonsimple_float_to_lex + +julia> bits = 0xff00 +0xff00 + +julia> signbit(reinterpret(Float16, bits)) +true + +julia> nonsimple_float_to_lex(lex_to_float(Float16, bits)) == bits +true +``` +""" +function nonsimple_float_to_lex(f::T) where {T<:Base.IEEEFloat} _, exponent, mantissa = tear(f) mantissa = update_mantissa(T, exponent, mantissa) exponent = decode_exponent(exponent) From c8462e4e574b6056e580f64f41ba7cdcc3f783e6 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 11:48:50 -0500 Subject: [PATCH 11/17] replace assertion in update_mantissa --- src/float.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/float.jl b/src/float.jl index ae22798..638b6b3 100644 --- a/src/float.jl +++ b/src/float.jl @@ -127,7 +127,8 @@ The encoding is as follows: See the [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L165) in hypothesis """ function update_mantissa(::Type{T}, exponent::iT, mantissa::iT)::iT where {T<:Base.IEEEFloat,iT<:Unsigned} - @assert uint(T) == iT + exponent = convert(uint(T), exponent) + mantissa = convert(uint(T), mantissa) # The unbiased exponent is <= 0 if exponent <= bias(T) # reverse the bits of the mantissa in place From 54b1c8d3dcb0cce5a78df81cc143f573e1880b1e Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 12:03:16 -0500 Subject: [PATCH 12/17] simplify float encoding test code --- test/runtests.jl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 8e36ab9..3e343cc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -535,14 +535,12 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 end function roundtrip_encoding(f) - assume!(!signbit(f)) encoded = FloatEncoding.float_to_lex(f) decoded = FloatEncoding.lex_to_float(T, encoded) reinterpret(iT, decoded) == reinterpret(iT, f) end - roundtrip_examples = map(Data.Just, - T[ + @testset for f in T[ 0.0, 2.5, 8.000000000000007, @@ -550,9 +548,11 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 2.0, 1.9999999999999998, 1.0 - ]) - @check roundtrip_encoding(Data.OneOf(roundtrip_examples...)) - @check roundtrip_encoding(Data.Floats{T}(; minimum=zero(T))) + ] + @test roundtrip_encoding(f) + end + + @check roundtrip_encoding(Data.Floats{T}(; minimum=nextfloat(zero(T)))) @testset "Ordering" begin function order_integral_part(n, g) @@ -581,8 +581,7 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 end @check function fractional_floats_greater_than_1( - f=Data.Floats{T}(; minimum=zero(T), maximum=one(T), nans=false)) - assume!(0 < f < 1) + f=Data.Floats{T}(; minimum=nextfloat(zero(T)), maximum=prevfloat(one(T)), nans=false)) FloatEncoding.float_to_lex(f) > FloatEncoding.float_to_lex(one(T)) end end From 3d0e21119f9e3d08a1148737d99cfccdbe7d0b84 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Thu, 18 Jul 2024 12:21:35 -0500 Subject: [PATCH 13/17] move floating point utilities to FloatEncoding --- src/data.jl | 8 ++-- src/float.jl | 96 +++++++++++++++++++++++++++++++++++++++++++++++- src/util.jl | 54 --------------------------- test/runtests.jl | 8 ++-- 4 files changed, 103 insertions(+), 63 deletions(-) diff --git a/src/data.jl b/src/data.jl index 3fce06f..3b68533 100644 --- a/src/data.jl +++ b/src/data.jl @@ -39,7 +39,7 @@ as well as these utility functions: module Data using Supposition -using Supposition: smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject +using Supposition: FloatEncoding, smootherstep, lerp, TestCase, choice!, weighted!, forced_choice!, reject using Supposition.FloatEncoding: lex_to_float using RequiredInterfaces: @required using StyledStrings: @styled_str @@ -1452,7 +1452,7 @@ end function produce!(tc::TestCase, f::Floats{T}) where {T} - iT = Supposition.uint(T) + iT = FloatEncoding.uint(T) bits = produce!(tc, Integers{iT}()) @@ -1478,8 +1478,8 @@ function float_remap(num::T, _min::T, _max::T) where T <: AbstractFloat # We're outside of the desired bounds, so use the mantissa # to resample the actual range range_size = min(_max - _min, floatmax(T)) - _, _, mantissa = Supposition.tear(num) - max_mantissa = oftype(mantissa, (2^Supposition.fracsize(T)) - 1) + _, _, mantissa = FloatEncoding.tear(num) + max_mantissa = oftype(mantissa, (2^FloatEncoding.fracsize(T)) - 1) num = _min + range_size * (mantissa / max_mantissa) # ensure the value is still in the desired range diff --git a/src/float.jl b/src/float.jl index 638b6b3..cee1052 100644 --- a/src/float.jl +++ b/src/float.jl @@ -1,5 +1,99 @@ +""" + FloatEncoding + +This module includes utilities for maniuplating floating point numbers in an IEEE 754 encoding: + +Additionally, it implements an encoding for floating point numbers that has better shrinking properties, ported from hypothesis. See [`lex_to_float`](@ref) for more details. +""" module FloatEncoding -using Supposition: uint, tear, bias, fracsize, exposize, max_exponent, assemble + +""" + uint(::Type{F}) + +Returns the unsigned integer type that can hold the bit pattern of a floating point number of type `F`. +""" +uint(::Type{Float16}) = UInt16 +""" + uint(::F) + +Returns the zero value of the unsigned integer type that can hold the bit pattern of a floating point number of type `F`. +""" +uint(::Float16) = zero(UInt16) +uint(::Type{Float32}) = UInt32 +uint(::Float32) = zero(UInt32) +uint(::Type{Float64}) = UInt64 +uint(::Float64) = zero(UInt64) + +""" + fracsize(::Type{F}) + +Returns the number of bits in the fractional part of a floating point number of type `F`. +""" +fracsize(::Type{Float16}) = 10 +fracsize(::Type{Float32}) = 23 +fracsize(::Type{Float64}) = 52 + + +""" + exposize(::Type{F}) + +Returns the number of bits in the exponent part of a floating point number of type `F`. +""" +exposize(::Type{Float16}) = 5 +exposize(::Type{Float32}) = 8 +exposize(::Type{Float64}) = 11 + +""" + max_exponent(::Type{F}) + +The maximum value of the exponent bits of a floating point number of type `F`. +""" +max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = (1 << exposize(T) - 1) % uint(T) +""" + bias(::Type{F}) + +The IEEE 754 bias of the exponent bits of a floating point number of type `F`. +""" +bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1) + +function masks(::Type{T}) where {T<:Base.IEEEFloat} + ui = uint(T) + signbitmask = one(ui) << (8 * sizeof(ui) - 1) + fracbitmask = (-1 % ui) >> (8 * sizeof(ui) - fracsize(T)) + expobitmask = ((-1 % ui) >> (8 * sizeof(ui) - exposize(T))) << fracsize(T) + signbitmask, expobitmask, fracbitmask +end + +""" + assemble(::T, sign::I, expo::I, frac::I) where {I, T <: Union{Float16, Float32, Float64}} -> T + +Assembles `sign`, `expo` and `frac` arguments into the floating point number of type `T` it represents. +`sizeof(T)` must match `sizeof(I)`. +""" +function assemble(::Type{T}, sign::I, expo::I, frac::I) where {I,T<:Base.IEEEFloat} + sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the other arguments of type `I`!")) + signmask, expomask, fracmask = masks(T) + sign = (sign << (exposize(T) + fracsize(T))) & signmask + expo = (expo << fracsize(T)) & expomask + frac = frac & fracmask + ret = sign | expo | frac + reinterpret(T, ret) +end + +""" + tear(x::T) where T <: Union{Float16, Float32, Float64} -> Tuple{I, I, I} + +Returns the sign, exponent and fractional parts of a floating point number. +The returned tuple consists of three unsigned integer types `I` of the same bitwidth as `T`. +""" +function tear(x::T) where {T<:Base.IEEEFloat} + signbitmask, expobitmask, fracbitmask = masks(T) + ur = reinterpret(uint(T), x) + s = (ur & signbitmask) >> (exposize(T) + fracsize(T)) + e = (ur & expobitmask) >> fracsize(T) + f = (ur & fracbitmask) >> 0x0 + s, e, f +end """ exponent_key(T, e) diff --git a/src/util.jl b/src/util.jl index 768e32b..ffd38b4 100644 --- a/src/util.jl +++ b/src/util.jl @@ -13,60 +13,6 @@ function windows(array, a,b) head, middle, tail end -uint(::Type{Float16}) = UInt16 -uint(:: Float16) = zero(UInt16) -uint(::Type{Float32}) = UInt32 -uint(:: Float32) = zero(UInt32) -uint(::Type{Float64}) = UInt64 -uint(:: Float64) = zero(UInt64) -fracsize(::Type{Float16}) = 10 -fracsize(::Type{Float32}) = 23 -fracsize(::Type{Float64}) = 52 -exposize(::Type{Float16}) = 5 -exposize(::Type{Float32}) = 8 -exposize(::Type{Float64}) = 11 - -max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = (1 << exposize(T) - 1) % uint(T) -bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1) - -function masks(::Type{T}) where T <: Base.IEEEFloat - ui = uint(T) - signbitmask = one(ui) << (8*sizeof(ui)-1) - fracbitmask = (-1 % ui) >> (8*sizeof(ui)-fracsize(T)) - expobitmask = ((-1 % ui) >> (8*sizeof(ui)-exposize(T))) << fracsize(T) - signbitmask, expobitmask, fracbitmask -end - -""" - assemble(::T, sign::I, expo::I, frac::I) where {I, T <: Union{Float16, Float32, Float64}} -> T - -Assembles `sign`, `expo` and `frac` arguments into the floating point number of type `T` it represents. -`sizeof(T)` must match `sizeof(I)`. -""" -function assemble(::Type{T}, sign::I, expo::I, frac::I) where {I, T <: Base.IEEEFloat} - sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the other arguments of type `I`!")) - signmask, expomask, fracmask = masks(T) - sign = (sign << (exposize(T) + fracsize(T))) & signmask - expo = (expo << fracsize(T)) & expomask - frac = frac & fracmask - ret = sign | expo | frac - return reinterpret(T, ret) -end - -""" - tear(x::T) where T <: Union{Float16, Float32, Float64} -> Tuple{I, I, I} - -Returns the sign, exponent and fractional parts of a floating point number. -The returned tuple consists of three unsigned integer types `I` of the same bitwidth as `T`. -""" -function tear(x::T) where T <: Base.IEEEFloat - signbitmask, expobitmask, fracbitmask = masks(T) - ur = reinterpret(uint(T), x) - s = (ur & signbitmask) >> (exposize(T) + fracsize(T)) - e = (ur & expobitmask) >> fracsize(T) - f = (ur & fracbitmask) >> 0x0 - return (s, e, f) -end lerp(x,y,t) = y*t + x*(1-t) function smootherstep(a, b, t) diff --git a/test/runtests.jl b/test/runtests.jl index 3e343cc..2324e01 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -517,12 +517,12 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 @testset "Floating point encoding" begin @testset for T in (Float16, Float32, Float64) - iT = Supposition.uint(T) + iT = FloatEncoding.uint(T) # These invariants are ported from Hypothesis @testset "Exponent encoding" begin # The highest value here is 0x7ff (2047) with Int64, # so we can just exhaustively test all 2048 possibilities - exponents = zero(iT):Supposition.max_exponent(T) + exponents = zero(iT):FloatEncoding.max_exponent(T) # Round tripping @test all(exponents) do e @@ -568,7 +568,7 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 @check order_integral_part( Data.Floats{T}(; minimum=one(T), - maximum=T(2^(Supposition.fracsize(T) + 1)), + maximum=T(2^(FloatEncoding.fracsize(T) + 1)), nans=false), Data.Floats{T}(; minimum=nextfloat(zero(T)), maximum=prevfloat(one(T)), nans=false)) @@ -1369,7 +1369,7 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 @testset for T in (Float16, Float32, Float64) @check function floatfunc(f=Data.Floats{T}()) orig = bitstring(f) - reassembled = bitstring(Supposition.assemble(T, Supposition.tear(f)...)) + reassembled = bitstring(FloatEncoding.assemble(T, FloatEncoding.tear(f)...)) orig == reassembled end end From c1555cb7d512e09ce3609d5c8968bfd79ae0b652 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Fri, 19 Jul 2024 08:08:39 -0500 Subject: [PATCH 14/17] clean up and clarify docstrings for FloatEncoding Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- src/float.jl | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/float.jl b/src/float.jl index cee1052..5c3a538 100644 --- a/src/float.jl +++ b/src/float.jl @@ -1,23 +1,22 @@ """ FloatEncoding -This module includes utilities for maniuplating floating point numbers in an IEEE 754 encoding: +This module includes utilities for manipulating floating point numbers in IEEE 754 encoding, focusing on the standard Julia types `Float16`, `Float32` and `Float64`. Additionally, it implements an encoding for floating point numbers that has better shrinking properties, ported from hypothesis. See [`lex_to_float`](@ref) for more details. """ module FloatEncoding """ - uint(::Type{F}) + uint(::Type{F}) where F <: Base.IEEEFloat + uint(::F) where F <: Base.IEEEFloat -Returns the unsigned integer type that can hold the bit pattern of a floating point number of type `F`. +When given a type, returns the unsigned integer type `T` that can hold the bit pattern of a floating point number of type `F`. +When given an object, returns `zero(T)`. """ -uint(::Type{Float16}) = UInt16 -""" - uint(::F) +function uint end -Returns the zero value of the unsigned integer type that can hold the bit pattern of a floating point number of type `F`. -""" +uint(::Type{Float16}) = UInt16 uint(::Float16) = zero(UInt16) uint(::Type{Float32}) = UInt32 uint(::Float32) = zero(UInt32) @@ -46,13 +45,15 @@ exposize(::Type{Float64}) = 11 """ max_exponent(::Type{F}) -The maximum value of the exponent bits of a floating point number of type `F`. +The maximum value of the exponent bits of a floating point number of type `F`, given as a `uint(F)`. """ max_exponent(::Type{T}) where {T<:Base.IEEEFloat} = (1 << exposize(T) - 1) % uint(T) """ bias(::Type{F}) The IEEE 754 bias of the exponent bits of a floating point number of type `F`. + +See also [Wikipedia: Exponent bias](https://en.wikipedia.org/wiki/Exponent_bias). """ bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1) @@ -98,9 +99,11 @@ end """ exponent_key(T, e) -A lexicographical ordering for floating point exponents. The encoding is ported +A lexicographical ordering for floating point exponents `e`, for the given floating point type `T`. The encoding is ported from Hypothesis. +`e` is expected to be in the closed interval `[0, max_exponent(T)]`. + The ordering is * non-negative exponents in increasing order @@ -131,7 +134,7 @@ end _make_encoding_table(T) Build a look up table for encoding exponents of floating point numbers of type `T`. -For a floating point type `T`, the lookup table is a permutation of the unsigned integers of type [`uint`](@ref) from `0` to `max_exponent(T)`. +For a floating point type `T`, the lookup table is a permutation of the unsigned integers of type [`uint(T)`](@ref) from `0` to `max_exponent(T)`. This allows the reordering of the exponent bits of a floating point number according to the encoding described in [`exponent_key`](@ref). """ @@ -143,6 +146,8 @@ _make_encoding_table(T) = sort(zero(uint(T)):max_exponent(T); A dictionary mapping `Unsigned` types to encoding tables for exponents. The encoding is described in [`exponent_key`](@ref) and is ported from Hypothesis. +Currently, only `UInt16`, `UInt32` and `UInt64` are available. + # See Also [`encode_exponent`](@ref) @@ -186,6 +191,8 @@ end A dictionary mapping `Unsigned` types to decoding tables for exponents. The encoding is described in [`exponent_key`](@ref) and is ported from Hypothesis. +Currently, only `UInt16`, `UInt32` and `UInt64` are available. + # See Also [`decode_exponent`](@ref) @@ -246,8 +253,8 @@ end """ lex_to_float(T, bits) -Reinterpret the bits of a floating point number using an encoding with better shrinking -properties. +Convert the given `bits` of the lexicographical encoding to a floating point number of type `T`. +The bitwidth of `T` must match `sizeof(bits)`. This produces a non-negative floating point number, possibly including `NaN` or `Inf`. The encoding is ported from Hypothesis, and has the property that lexicographically smaller @@ -271,7 +278,6 @@ If the sign bit is not set: ## Extended Help See the [reference implementation](https://github.com/HypothesisWorks/hypothesis/blob/aad70fb2d9dec2cef9719cdf5369eec9fae0d2a4/hypothesis-python/src/hypothesis/internal/conjecture/floats.py#L176) in Hypothesis. - """ function lex_to_float(::Type{T}, bits::I)::T where {I,T<:Base.IEEEFloat} sizeof(T) == sizeof(I) || throw(ArgumentError("The bitwidth of `$T` needs to match the bitwidth of the given bits!")) @@ -296,11 +302,13 @@ Encoding a floating point number as a bit pattern. This is essentially the inverse of [`lex_to_float`](@ref) and produces a bit pattern that is lexicographically smaller for 'simpler' floats. -Note that while `lex_to_float` can produce any valid positive floating point number, it is not injective. So combined with the fact that positive and negative floats map to the same bit pattern, -`float_to_lex` is not an exact inverse of `lex_to_float`. +!!! note "Injectivity" + Note that while `lex_to_float` can produce any valid positive floating point number, it is not injective. + So combined with the fact that positive and negative floats map to the same bit pattern, `float_to_lex` + is not an exact inverse of `lex_to_float`. """ function float_to_lex(f::T) where {T<:Base.IEEEFloat} - # If the float is simple, we can just reinterpret it as an integer + # If the float is simple, we can just convert it to an integer # This corresponds to the latter branch of lex_to_float if is_simple_float(f) uint(T)(f) @@ -312,7 +320,7 @@ end """ is_simple_float(f) -`f` is simple if it is integral and the first byte is all zeros. +Return whether the given floating point number is considered "simple" in terms of the lexicographic encoding. """ function is_simple_float(f::T) where {T<:Base.IEEEFloat} if trunc(f) != f From 9f39ce8c27d5089277f221ee93c07f8e636a8e90 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Fri, 19 Jul 2024 08:10:10 -0500 Subject: [PATCH 15/17] refactor claculation of bias Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- src/float.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/float.jl b/src/float.jl index 5c3a538..be7a62d 100644 --- a/src/float.jl +++ b/src/float.jl @@ -55,7 +55,7 @@ The IEEE 754 bias of the exponent bits of a floating point number of type `F`. See also [Wikipedia: Exponent bias](https://en.wikipedia.org/wiki/Exponent_bias). """ -bias(::Type{T}) where {T<:Base.IEEEFloat} = uint(T)(1 << (exposize(T) - 1) - 1) +bias(::Type{T}) where {T<:Base.IEEEFloat} = (1 << (exposize(T) - 1) - 1) % uint(T) function masks(::Type{T}) where {T<:Base.IEEEFloat} ui = uint(T) From add36a1c363e936bd159654a1b01c763941abb86 Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Fri, 19 Jul 2024 08:53:44 -0500 Subject: [PATCH 16/17] refactor encoding tests for NaN edgecases The test was previously failing because NaNs were being generated with the signbit set, but lex_to_float only generates floats with an unset signbit. Refactored the tests to test for all "positive" NaNs, separately from other positive float. --- test/runtests.jl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 2324e01..71f3348 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -534,6 +534,8 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 end end + # check that the encoding roundtrips properly + # N.B. this property only holds for floats with the signbit unset function roundtrip_encoding(f) encoded = FloatEncoding.float_to_lex(f) decoded = FloatEncoding.lex_to_float(T, encoded) @@ -552,7 +554,16 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 @test roundtrip_encoding(f) end - @check roundtrip_encoding(Data.Floats{T}(; minimum=nextfloat(zero(T)))) + @check roundtrip_encoding(Data.Floats{T}(; minimum=nextfloat(zero(T)), nans=false)) + + # sample from all possible NaN bit patterns + # with the signbit unset + nan_gen = + map(bits -> + FloatEncoding.assemble(T, zero(iT), FloatEncoding.max_exponent(T), bits), + Data.Integers(zero(iT), (1 << FloatEncoding.fracsize(T) - 1) % iT)) + + @check roundtrip_encoding(nan_gen) @testset "Ordering" begin function order_integral_part(n, g) From e6942696572a975f03909a566dbf3408d8a4d81c Mon Sep 17 00:00:00 2001 From: Zach Raines Date: Fri, 19 Jul 2024 09:36:05 -0500 Subject: [PATCH 17/17] simplify encoding testing for nans Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- test/runtests.jl | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 71f3348..a5270cb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -554,16 +554,12 @@ const verb = VERSION.major == 1 && VERSION.minor < 11 @test roundtrip_encoding(f) end - @check roundtrip_encoding(Data.Floats{T}(; minimum=nextfloat(zero(T)), nans=false)) - - # sample from all possible NaN bit patterns - # with the signbit unset - nan_gen = - map(bits -> - FloatEncoding.assemble(T, zero(iT), FloatEncoding.max_exponent(T), bits), - Data.Integers(zero(iT), (1 << FloatEncoding.fracsize(T) - 1) % iT)) - - @check roundtrip_encoding(nan_gen) + # `roundtrip_encoding` assumes the signbit is unset + roundtrip_gen = map(Data.Floats{T}()) do f + _, exp, frac = FloatEncoding.tear(f) + FloatEncoding.assemble(T, zero(iT), exp, frac) + end + @check roundtrip_encoding(roundtrip_gen) @testset "Ordering" begin function order_integral_part(n, g)