Merge pull request #36 from milankl/fastfloat16

fastfloat16sr first implementation
milankl · Feb 7, 2021 · 75565a8 · 75565a8
2 parents 69d6c85 + 5eb5d87
commit 75565a8
Show file tree

Hide file tree

Showing 10 changed files with 1,340 additions and 12 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -0,0 +1,28 @@
+name: CI
+on:
+  pull_request:
+    branches:
+      - main
+  push:
+    branches:
+      - main
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        julia-version: ["1.0", "1.5", nightly]
+        julia-arch: [x64]
+        os: [ubuntu-latest, windows-latest, macOS-latest]
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: "Set up Julia"
+        uses: julia-actions/setup-julia@latest
+        with:
+          version: ${{ matrix.julia-version }}
+          arch: ${{ matrix.julia-arch }}
+      - uses: julia-actions/julia-runtest@latest
+      - uses: julia-actions/julia-uploadcodecov@latest
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/Project.toml b/Project.toml
@@ -1,16 +1,18 @@
 name = "StochasticRounding"
 uuid = "3843c9a1-1f18-49ff-9d99-1b4c8a8e97ed"
 authors = ["Milan Kloewer"]
-version = "0.4.1"
+version = "0.5.0"
 
 [deps]
 BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
+FastFloat16s = "ecdfd59a-7c14-409c-ab7a-3704f8e92dd6"
 RandomNumbers = "e6cf234a-135c-5ec9-84dd-332b85af5143"
 
 [compat]
+BFloat16s = "^0.1"
+FastFloat16s = "^0.1"
 RandomNumbers = "^1.4"
 julia = "^1"
-BFloat16s = "^0.1"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

diff --git a/README.md b/README.md
@@ -4,10 +4,10 @@
 
 Stochastic rounding for floating-point arithmetic.
 
-This package exports `Float32sr`,`Float16sr` and `BFloat16sr`. Three number formats that behave like their deterministic counterparts but
-with stochastic rounding that is proportional to the distance of the next representable numbers and therefore
-[exact in expectation](https://en.wikipedia.org/wiki/Rounding#Stochastic_rounding) (see also example below in "Usage"). 
-Although there is currently no known hardware implementation available, 
+This package exports `Float32sr`,`Float16sr`,`FastFloat16sr` and `BFloat16sr`, four number formats that behave
+like their deterministic counterparts but with stochastic rounding that is proportional to the distance of the
+next representable numbers and therefore [exact in expectation](https://en.wikipedia.org/wiki/Rounding#Stochastic_rounding)
+(see also example below in "Usage").  Although there is currently no known hardware implementation available, 
 [Graphcore is working on IPUs with stochastic rounding](https://www.graphcore.ai/posts/directions-of-ai-research). 
 Stochastic rounding makes the number formats considerably slower, but e.g. Float32+stochastic rounding is only 
 about 2x slower than Float64. [Xoroshio128Plus](https://sunoru.github.io/RandomNumbers.jl/stable/man/xorshifts/#Xorshift-Family-1), 
@@ -16,6 +16,9 @@ a random number generator from the [Xorshift family](https://en.wikipedia.org/wi
 
 You are welcome to raise [issues](https://github.com/milankl/StochasticRounding.jl/issues), ask questions or suggest any changes or new features.
 
+`BFloat16sr` is based on [BFloat16s.jl](https://github.com/JuliaMath/BFloat16s.jl)   
+`FastFloat16sr` is based on [FastFloat16s.jl](https://github.com/milankl/FastFloat16s.jl)
+
 ### Usage
 
 ```julia
@@ -66,10 +69,10 @@ julia> B1,B2 = Float32sr.(A1),Float32sr.(A2);
 ```
 And similarly for the other number types. Then on an Intel(R) Core(R) i5 (Ice Lake) @ 1.1GHz timings via `@btime +($A1,$A2)` etc. are
 
-| rounding mode         | Float32    | BFloat16   | Float64   | Float16   |
-| --------------------- | ---------- | ---------- | --------- | --------- |
-| default               | 460 μs     | 556 μs     | 1.151ms   | 16.446 ms |
-| + stochastic rounding | 2.585 ms   | 3.820 ms   | n/a       | 20.714 ms |
+| rounding mode         | Float32    | BFloat16   | Float64   | [FastFloat16](https://github.com/milankl/FastFloat16s.jl) | Float16   |
+| --------------------- | ---------- | ---------- | --------- | ----------- | --------- |
+| default               | 460 μs     | 556 μs     | 1.151ms   | 629 μs      | 16.446 ms |
+| + stochastic rounding | 2.585 ms   | 3.820 ms   | n/a       | 4.096 ms    | 20.714 ms |
 
 Stochastic rounding imposes an about x5-7 performance decrease for Float32/BFloat16, but is almost negligible for Float16. 
 For Float32sr about 50% of the time is spend on the random number generation, a bit less than 50% on the addition in

diff --git a/figs/drawing.svg b/figs/drawing.svg
diff --git a/figs/stochastic_rounding_schematic2.xcf b/figs/stochastic_rounding_schematic2.xcf
diff --git a/src/StochasticRounding.jl b/src/StochasticRounding.jl
@@ -5,7 +5,9 @@ module StochasticRounding
         Float16sr,Float16_stochastic_round,
         Float16_chance_roundup,NaN16sr,Inf16sr,
         Float32sr,Float32_stochastic_round,
-        Float32_chance_roundup,NaN32sr,Inf32sr
+        Float32_chance_roundup,NaN32sr,Inf32sr,
+        FastFloat16sr,FastFloat16_stochastic_round,
+        FastFloat16_chance_roundup,NaNF16sr,InfF16sr
 
 	import Base: isfinite, isnan, precision, iszero,
         sign_mask, exponent_mask, significand_mask,
@@ -40,5 +42,6 @@ module StochasticRounding
     include("bfloat16sr.jl")
     include("float16sr.jl")
     include("float32sr.jl")
+    include("fast_float16sr.jl")
 
 end
diff --git a/src/fast_float16sr.jl b/src/fast_float16sr.jl
@@ -0,0 +1,142 @@
+import FastFloat16s.FastFloat16
+
+"""The Float32 + stochastic rounding type."""
+primitive type FastFloat16sr <: AbstractFloat 32 end
+
+# basic properties
+sign_mask(::Type{FastFloat16sr}) = 0x8000_0000
+exponent_mask(::Type{FastFloat16sr}) = 0x7f80_0000
+significand_mask(::Type{FastFloat16sr}) = 0x007f_ffff
+precision(::Type{FastFloat16sr}) = 11
+
+one(::Type{FastFloat16sr}) = reinterpret(FastFloat16sr,one(Float32))
+zero(::Type{FastFloat16sr}) = reinterpret(FastFloat16sr,0x0000_0000)
+one(::FastFloat16sr) = one(FastFloat16sr)
+zero(::FastFloat16sr) = zero(FlastFloat16r)
+
+typemin(::Type{FastFloat16sr}) = Float32sr(typemin(Float16))
+typemax(::Type{FastFloat16sr}) = Float32sr(typemax(Float16))
+floatmin(::Type{FastFloat16sr}) = Float32sr(floatmin(Float16))
+floatmax(::Type{FastFloat16sr}) = Float32sr(floatmax(FastFlaot16))
+
+typemin(::FastFloat16sr) = typemin(FastFloat16sr)
+typemax(::FastFloat16sr) = typemax(FastFloat16sr)
+floatmin(::FastFloat16sr) = floatmin(FastFloat16sr)
+floatmax(::FastFloat16sr) = floatmax(FastFloat16sr)
+
+eps(::Type{FastFloat16sr}) = FastFloat16sr(eps(Float16))
+eps(x::FastFloat16sr) = FastFloat16sr(eps(Float16(x)))
+
+const InfF16sr = reinterpret(FastFloat16sr, Inf32)
+const NaNF16sr = reinterpret(FastFloat16sr, NaN32)
+
+# basic operations
+abs(x::FastFloat16sr) = reinterpret(FastFloat16sr, abs(Float32(x)))
+isnan(x::FastFloat16sr) = isnan(Float32(x))
+isfinite(x::FastFloat16sr) = isfinite(Float32(x))
+
+nextfloat(x::FastFloat16sr) = FastFloat16sr(nextfloat(Float16(x)))
+prevfloat(x::FastFloat16sr) = FastFloat16sr(prevfloat(Float16(x)))
+
+-(x::FastFloat16sr) = reinterpret(FastFloat16sr, reinterpret(UInt32, x) ⊻ sign_mask(FastFloat16sr))
+
+# conversions
+Float32(x::FastFloat16sr) = reinterpret(Float32,x)
+FastFloat16sr(x::FastFloat16) = reinterpret(FastFloat16sr,x)
+FastFloat16(x::FastFloat16sr) = reinterpret(FastFloat16,x)
+FastFloat16sr(x::Float32) = FastFloat16sr(FastFloat16(x))
+FastFloat16sr(x::Float16) = FastFloat16sr(Float32(x))
+FastFloat16sr(x::Float64) = FastFloat16sr(Float32(x))
+Float16(x::FastFloat16sr) = Float16(Float32(x))
+Float64(x::FastFloat16sr) = Float64(Float32(x))
+
+FastFloat16sr(x::Integer) = FastFloat16sr(Float32(x))
+(::Type{T})(x::FastFloat16sr) where {T<:Integer} = T(Float32(x))
+
+"""Convert to FastFloat16sr from Float32 with stochastic rounding.
+Binary arithmetic version."""
+function FastFloat16_stochastic_round(x::Float32)
+	ix = reinterpret(Int32,x)
+	# if deterministically round to 0 return 0
+	# to avoid a stochastic rounding to NaN
+	# push to the left to get rid of sign
+	# push to the right to get rid of the insignificant bits
+	((ix << 1) >> 13) == zero(Int32) && return zero(FastFloat16sr)
+
+	# r are random bits for the last 31
+	# >> either introduces 0s for the first 33 bits
+	# or 1s. Interpreted as Int64 this corresponds to [-ulp/2,ulp/2)
+	# which is added with binary arithmetic subsequently
+	# this is the stochastic perturbation.
+	# Then deterministic round to nearest to either round up or round down.
+	r = rand(Xor128[],Int32) >> 19   # = preserve 1 sign, 8 ebits, 10sbits
+	xr = reinterpret(Float32,ix + r)
+	return FastFloat16sr(xr)			# round to nearest
+end
+
+# # Promotion
+# promote_rule(::Type{Float16}, ::Type{FastFloat32sr}) = Float32
+# promote_rule(::Type{Float64}, ::Type{Float32sr}) = Float64
+
+for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
+    @eval promote_rule(::Type{FastFloat16sr}, ::Type{$t}) = FastFloat16sr
+end
+
+# Rounding
+round(x::FastFloat16sr, r::RoundingMode{:ToZero}) = FastFloat16sr(round(Float32(x), r))
+round(x::FastFloat16sr, r::RoundingMode{:Down}) = FastFloat16sr(round(Float32(x), r))
+round(x::FastFloat16sr, r::RoundingMode{:Up}) = FastFloat16sr(round(Float32(x), r))
+round(x::FastFloat16sr, r::RoundingMode{:Nearest}) = FastFloat16sr(round(Float32(x), r))
+
+# Comparison
+function ==(x::FastFloat16sr, y::FastFloat16sr)
+	return Float32(x) == Float32(y)
+end
+
+for op in (:<, :<=, :isless)
+    @eval ($op)(a::FastFloat16sr, b::FastFloat16sr) = ($op)(Float32(a), Float32(b))
+end
+
+# Arithmetic
+for f in (:+, :-, :*, :/, :^)
+	@eval ($f)(x::FastFloat16sr, y::FastFloat16sr) = FastFloat16_stochastic_round($(f)(Float32(x), Float32(y)))
+end
+
+for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
+             :atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p)
+    @eval begin
+        Base.$func(a::FastFloat16sr) = FastFloat16_stochastic_round($func(Float32(a)))
+    end
+end
+
+for func in (:atan,:hypot)
+    @eval begin
+        $func(a::FastFloat16sr,b::FastFloat16sr) = FastFloat16_stochastic_round($func(Float32(a),Float32(b)))
+    end
+end
+
+
+# Showing
+function show(io::IO, x::FastFloat16sr)
+    if isinf(x)
+        print(io, x < 0 ? "-InfF16sr" : "InfF16sr")
+    elseif isnan(x)
+        print(io, "NaNF16sr")
+    else
+		io2 = IOBuffer()
+        print(io2,Float32(x))
+        f = String(take!(io2))
+        print(io,"FastFloat16sr("*f*")")
+    end
+end
+
+bitstring(x::FastFloat16sr) = bitstring(reinterpret(UInt32,x))
+
+function bitstring(x::FastFloat16sr,mode::Symbol)
+    if mode == :split	# split into sign, exponent, signficand
+        s = bitstring(x)
+		return "$(s[1]) $(s[2:9]) $(s[10:end])"
+    else
+        return bitstring(x)
+    end
+end
diff --git a/src/float32sr.jl b/src/float32sr.jl
@@ -32,7 +32,7 @@ const Inf32sr = reinterpret(Float32sr, Inf32)
 const NaN32sr = reinterpret(Float32sr, NaN32)
 
 # basic operations
-abs(x::Float32sr) = reinterpret(Float32sr, abs(reinterpret(Float32)))
+abs(x::Float32sr) = reinterpret(Float32sr, abs(reinterpret(Float32,x)))
 isnan(x::Float32sr) = isnan(reinterpret(Float32,x))
 isfinite(x::Float32sr) = isfinite(reinterpret(Float32,x))