Skip to content

Commit

Permalink
Optimize pow5_12(::Float32) (#493)
Browse files Browse the repository at this point in the history
  • Loading branch information
kimikage authored Jun 23, 2021
1 parent 9710272 commit bf61512
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
3 changes: 2 additions & 1 deletion src/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ correct_gamut(c::CV) where {CV<:TransparentRGB} =
@inline function srgb_compand(v)
F = typeof(0.5f0v) === Float32 ? Float32 : promote_type(Float64, typeof(v))
vf = F(v)
vc = @fastmath max(vf, F(0.0031308))
# `pow5_12` is an optimized function to get `v^(1/2.4)`
vf > F(0.0031308) ? muladd(F(1.055), F(pow5_12(vf)), F(-0.055)) : F(12.92) * vf
vf > F(0.0031308) ? muladd(F(1.055), F(pow5_12(vc)), F(-0.055)) : F(12.92) * vf
end

function _hsx_to_rgb(im::UInt8, v, n, m)
Expand Down
18 changes: 16 additions & 2 deletions src/utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ pow3_4(x) = (y = @fastmath(sqrt(x)); y*@fastmath(sqrt(y))) # x^(3/4)

# `pow5_12` is called from `srgb_compand`.
pow5_12(x) = pow3_4(x) / cbrt(x) # 5/12 == 1/2 + 1/4 - 1/3 == 3/4 - 1/3
pow5_12(x::Float32) = Float32(pow5_12(Float64(x)))
@inline function pow5_12(x::Float64)
@noinline _cbrt(x) = cbrt01(x)
p3_4 = pow3_4(x)
# x^(-1/6)
if x < 0.02
Expand All @@ -106,7 +106,7 @@ pow5_12(x::Float32) = Float32(pow5_12(Float64(x)))
t0 = @evalpoly(x, 1.7047813285940905, -3.1261253501167308,
7.498744828350077, -10.100319516746419, 6.820601476522508, -1.7978894213531524)
else
return p3_4 / cbrt(x)
return p3_4 / _cbrt(x)
end
# x^(-1/3)
t1 = t0 * t0
Expand All @@ -117,6 +117,20 @@ pow5_12(x::Float32) = Float32(pow5_12(Float64(x)))
# x^(3/4) * x^(-1/3)
muladd(p3_4, t2, p3_4 * t2h)
end
@inline function pow5_12(x::Float32)
# x^(-1/3)
rc = rcbrt(x)
rcx = -rc * x
rch = muladd(muladd(rc, x, rcx), -rc^2, muladd(rc^2, rcx, 1.0f0)) # 1 - x * rc^3
rce = muladd(2/9f0, rch, 1/3f0) * rch * rc
# x^(3/4)
p3_4_f64 = pow3_4(Float64(x))
p3_4r = reinterpret(Float64, reinterpret(UInt64, p3_4_f64) & 0xffffffff_e0000000)
p3_4 = Float32(p3_4r)
p3_4e = Float32(p3_4_f64 - p3_4r)
# x^(3/4) * x^(-1/3)
muladd(p3_4, rc, muladd(p3_4, rce, p3_4e * rc))
end

# `pow12_5` is called from `invert_srgb_compand`.
pow12_5(x) = pow12_5(Float64(x))
Expand Down

0 comments on commit bf61512

Please sign in to comment.