Skip to content

Commit

Permalink
add functions for doing saturated adds and subs
Browse files Browse the repository at this point in the history
  • Loading branch information
KristofferC committed Feb 21, 2020
1 parent 68c0b2d commit 154f08c
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 63 deletions.
31 changes: 21 additions & 10 deletions src/LLVM_intrinsics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@ suffix(N::Integer, ::Type{Ptr{T}}) where {T} = "v$(N)p0$(T<:IntegerTypes ? "i" :
suffix(N::Integer, ::Type{T}) where {T} = "v$(N)$(T<:IntegerTypes ? "i" : "f")$(8*sizeof(T))"
suffix(::Type{T}) where {T} = "$(T<:IntegerTypes ? "i" : "f")$(8*sizeof(T))"

llvm_name(llvmf, N, T) = string("llvm", ".", llvmf, ".", suffix(N, T))
llvm_name(llvmf, ::Type{LVec{N, T}}) where {N,T} = string("llvm", ".", llvmf, ".", suffix(N, T))
llvm_name(llvmf, ::Type{T}) where {T} = string("llvm", ".", llvmf, ".", suffix(T))
dotit(f) = replace(string(f), "_" => ".")
llvm_name(llvmf, N, T) = string("llvm", ".", dotit(llvmf), ".", suffix(N, T))
llvm_name(llvmf, ::Type{LVec{N, T}}) where {N,T} = string("llvm", ".", dotit(llvmf), ".", suffix(N, T))
llvm_name(llvmf, ::Type{T}) where {T} = string("llvm", ".", dotit(llvmf), ".", suffix(T))

llvm_type(::Type{T}) where {T} = d[T]
llvm_type(::Type{LVec{N, T}}) where {N,T} = "< $N x $(d[T])>"
Expand Down Expand Up @@ -171,13 +172,23 @@ const BINARY_INTRINSICS_FLOAT = [
:round
]

for f in BINARY_INTRINSICS_FLOAT
@eval @generated function $(f)(x::T, y::T) where T<:LT{<:FloatingTypes}
ff = llvm_name($(QuoteNode(f)), T,)
return :(
$(Expr(:meta, :inline));
ccall($ff, llvmcall, T, (T, T), x, y)
)
const BINARY_INTRINSICS_INT = [
:sadd_sat
:uadd_sat
:ssub_sat
:usub_sat
]

for (fs, c) in zip([BINARY_INTRINSICS_FLOAT, BINARY_INTRINSICS_INT],
[FloatingTypes, IntegerTypes])
for f in fs
@eval @generated function $(f)(x::T, y::T) where T<:LT{<:$c}
ff = llvm_name($(QuoteNode(f)), T,)
return :(
$(Expr(:meta, :inline));
ccall($ff, llvmcall, T, (T, T), x, y)
)
end
end
end

Expand Down
111 changes: 58 additions & 53 deletions src/simdvec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -177,50 +177,54 @@ end
####################

const BINARY_OPS = [
(:+ , IntegerTypes , Intrinsics.add)
(:- , IntegerTypes , Intrinsics.sub)
(:* , IntegerTypes , Intrinsics.mul)
(:div , UIntTypes , Intrinsics.udiv)
(:div , IntTypes , Intrinsics.sdiv)
(:rem , UIntTypes , Intrinsics.urem)
(:rem , IntTypes , Intrinsics.srem)

(:+ , FloatingTypes , Intrinsics.fadd)
(:- , FloatingTypes , Intrinsics.fsub)
(:* , FloatingTypes , Intrinsics.fmul)
(:^ , FloatingTypes , Intrinsics.pow)
(:/ , FloatingTypes , Intrinsics.fdiv)
(:rem , FloatingTypes , Intrinsics.frem)
(:min , FloatingTypes , Intrinsics.minnum)
(:max , FloatingTypes , Intrinsics.maxnum)
(:copysign , FloatingTypes , Intrinsics.copysign)

(:~ , BIntegerTypes , Intrinsics.xor)
(:& , BIntegerTypes , Intrinsics.and)
(:| , BIntegerTypes , Intrinsics.or)
(: , BIntegerTypes , Intrinsics.xor)

(:(==) , BIntegerTypes , Intrinsics.icmp_eq)
(:(!=) , BIntegerTypes , Intrinsics.icmp_ne)
(:(>) , BIntTypes , Intrinsics.icmp_sgt)
(:(>=) , BIntTypes , Intrinsics.icmp_sge)
(:(<) , BIntTypes , Intrinsics.icmp_slt)
(:(<=) , BIntTypes , Intrinsics.icmp_sle)
(:(>) , UIntTypes , Intrinsics.icmp_ugt)
(:(>=) , UIntTypes , Intrinsics.icmp_uge)
(:(<) , UIntTypes , Intrinsics.icmp_ult)
(:(<=) , UIntTypes , Intrinsics.icmp_ule)

(:(==) , FloatingTypes , Intrinsics.fcmp_oeq)
(:(!=) , FloatingTypes , Intrinsics.fcmp_une)
(:(>) , FloatingTypes , Intrinsics.fcmp_ogt)
(:(>=) , FloatingTypes , Intrinsics.fcmp_oge)
(:(<) , FloatingTypes , Intrinsics.fcmp_olt)
(:(<=) , FloatingTypes , Intrinsics.fcmp_ole)
(:(Base.:+) , IntegerTypes , Intrinsics.add)
(:(Base.:-) , IntegerTypes , Intrinsics.sub)
(:(Base.:*) , IntegerTypes , Intrinsics.mul)
(:(Base.div) , UIntTypes , Intrinsics.udiv)
(:(Base.div) , IntTypes , Intrinsics.sdiv)
(:(Base.rem) , UIntTypes , Intrinsics.urem)
(:(Base.rem) , IntTypes , Intrinsics.srem)

(:(add_saturate) , IntTypes , Intrinsics.sadd_sat)
(:(add_saturate) , UIntTypes , Intrinsics.uadd_sat)
(:(sub_saturate) , IntTypes , Intrinsics.ssub_sat)
(:(sub_saturate) , UIntTypes , Intrinsics.usub_sat)

(:(Base.:+) , FloatingTypes , Intrinsics.fadd)
(:(Base.:-) , FloatingTypes , Intrinsics.fsub)
(:(Base.:*) , FloatingTypes , Intrinsics.fmul)
(:(Base.:^) , FloatingTypes , Intrinsics.pow)
(:(Base.:/) , FloatingTypes , Intrinsics.fdiv)
(:(Base.rem) , FloatingTypes , Intrinsics.frem)
(:(Base.min) , FloatingTypes , Intrinsics.minnum)
(:(Base.max) , FloatingTypes , Intrinsics.maxnum)
(:(Base.copysign) , FloatingTypes , Intrinsics.copysign)
(:(Base.:~) , BIntegerTypes , Intrinsics.xor)
(:(Base.:&) , BIntegerTypes , Intrinsics.and)
(:(Base.:|) , BIntegerTypes , Intrinsics.or)
(:(Base.:) , BIntegerTypes , Intrinsics.xor)

(:(Base.:(==)) , BIntegerTypes , Intrinsics.icmp_eq)
(:(Base.:!=) , BIntegerTypes , Intrinsics.icmp_ne)
(:(Base.:>) , BIntTypes , Intrinsics.icmp_sgt)
(:(Base.:>=) , BIntTypes , Intrinsics.icmp_sge)
(:(Base.:<) , BIntTypes , Intrinsics.icmp_slt)
(:(Base.:<=) , BIntTypes , Intrinsics.icmp_sle)
(:(Base.:>) , UIntTypes , Intrinsics.icmp_ugt)
(:(Base.:>=) , UIntTypes , Intrinsics.icmp_uge)
(:(Base.:<) , UIntTypes , Intrinsics.icmp_ult)
(:(Base.:<=) , UIntTypes , Intrinsics.icmp_ule)

(:(Base.:(==)) , FloatingTypes , Intrinsics.fcmp_oeq)
(:(Base.:!=) , FloatingTypes , Intrinsics.fcmp_une)
(:(Base.:>) , FloatingTypes , Intrinsics.fcmp_ogt)
(:(Base.:>=) , FloatingTypes , Intrinsics.fcmp_oge)
(:(Base.:<) , FloatingTypes , Intrinsics.fcmp_olt)
(:(Base.:<=) , FloatingTypes , Intrinsics.fcmp_ole)
]

for (op, constraint, llvmop) in BINARY_OPS
@eval @inline function (Base.$op)(x::Vec{N, T}, y::Vec{N, T}) where {N, T <: $constraint}
@eval @inline function $op(x::Vec{N, T}, y::Vec{N, T}) where {N, T <: $constraint}
Vec($(llvmop)(x.data, y.data))
end
end
Expand Down Expand Up @@ -317,22 +321,23 @@ for v in (:<<, :>>, :>>>)
end
end


# Vectorize binary functions
for (op, constraint) in [BINARY_OPS;
(:flipsign , ScalarTypes)
(:copysign , ScalarTypes)
(:signbit , ScalarTypes)
(:min , IntegerTypes)
(:max , IntegerTypes)
(:<< , IntegerTypes)
(:>> , IntegerTypes)
(:>>> , IntegerTypes)
(:(Base.flipsign) , ScalarTypes)
(:(Base.copysign) , ScalarTypes)
(:(Base.signbit) , ScalarTypes)
(:(Base.min) , IntegerTypes)
(:(Base.max) , IntegerTypes)
(:(Base.:<<) , IntegerTypes)
(:(Base.:>>) , IntegerTypes)
(:(Base.:>>>) , IntegerTypes)
]
@eval @inline function (Base.$op)(x::T2, y::Vec{N, T}) where {N, T2<:ScalarTypes, T <: $constraint}
Base.$op(Vec{N, T}(x), y)
@eval @inline function $op(x::T2, y::Vec{N, T}) where {N, T2<:ScalarTypes, T <: $constraint}
$op(Vec{N, T}(x), y)
end
@eval @inline function (Base.$op)(x::Vec{N, T}, y::T2) where {N, T2 <:ScalarTypes, T <: $constraint}
Base.$op(x, Vec{N, T}(y))
@eval @inline function $op(x::Vec{N, T}, y::T2) where {N, T2 <:ScalarTypes, T <: $constraint}
$op(x, Vec{N, T}(y))
end
end

Expand Down
33 changes: 33 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,16 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...))
@test Tuple(V8I32(v8i32)^3) === v8i32.^3
end

@testset "saturation" begin
v = Vec{4, UInt8}(UInt8.((150, 250, 125, 0)))
@test SIMD.add_saturate(v, UInt8(50)) === Vec{4, UInt8}(UInt8.((200, 255, 175, 50)))
@test SIMD.sub_saturate(v, UInt8(100)) === Vec{4, UInt8}(UInt8.((50, 150, 25, 0)))
v = Vec{4, Int8}(Int8.((100, -100, 20, -20)))
@test SIMD.add_saturate(v, Int8(50)) === Vec{4, Int8}(Int8.((127, -50, 70, 30)))
@test SIMD.sub_saturate(v, Int8(50)) === Vec{4, Int8}(Int8.((50, -128, -30, -70)))

end

@testset "Floating point arithmetic functions" begin

global const v4f64b = map(x->Float64(x+1), v4f64)
Expand Down Expand Up @@ -632,6 +642,29 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...))
@test occursin(" fadd <4 x double>", ir)
# @test occursin(r"( shufflevector <4 x double>.*){2}"s, ir)
end

function isascii_simd(s::String)
len = sizeof(s)
nwords = len >> 7
_0x80 = Vec{32, UInt8}(0x80)
p = pointer(s)
i = 0
GC.@preserve s for _ in 1:nwords
comp = Vec{32, UInt8}(0x00)
for _ in 1:4
v = SIMD.vload(LVec{32, UInt8}, p + i)
comp_i = v & _0x80
comp += comp_i
i += 32
end
reduce(|, comp) == 0x00 || return false
end
#' Finish up the chunks
for i = nwords*32*4+1:len
@inbounds(codeunit(s, i)) >= 0x80 && return false
end
return true
end
end

@testset "Vector shuffles" begin
Expand Down

0 comments on commit 154f08c

Please sign in to comment.