diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 93c2cbc946bb5..9d8351be54e3e 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -815,7 +815,7 @@ static void buildArgTys(ASTContext &Context, CXXRecordDecl *KernelObj, assert(InitMethod && "sampler must have __init method"); // sampler __init method has only one parameter - // void __init(__spirv::OpTypeSampler *Sampler) + // void __init(__ocl_sampler_t *Sampler) auto *FuncDecl = cast(InitMethod); ParmVarDecl *SamplerArg = FuncDecl->getParamDecl(0); assert(SamplerArg && "sampler __init method must have sampler parameter"); @@ -918,7 +918,7 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, assert(InitMethod && "sampler must have __init method"); // sampler __init method has only one argument - // void __init(__spirv::OpTypeSampler *Sampler) + // void __init(__ocl_sampler_t *Sampler) auto *FuncDecl = cast(InitMethod); ParmVarDecl *SamplerArg = FuncDecl->getParamDecl(0); assert(SamplerArg && "sampler __init method must have sampler parameter"); diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index 6278c83e18c2a..dd6c4cb1073a1 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -12,52 +12,59 @@ #include #include -namespace cl { -namespace __spirv { - #ifdef __SYCL_DEVICE_ONLY__ - template -extern OpTypeEvent * -OpGroupAsyncCopy(int32_t Scope, __local dataT *Dest, __global dataT *Src, - size_t NumElements, size_t Stride, OpTypeEvent *E) noexcept; +extern __ocl_event_t __spirv_GroupAsyncCopy(int32_t Scope, __local dataT *Dest, + __global dataT *Src, + size_t NumElements, size_t Stride, + __ocl_event_t E) noexcept; template -extern OpTypeEvent * -OpGroupAsyncCopy(int32_t Scope, __global dataT *Dest, __local dataT *Src, - size_t NumElements, size_t Stride, OpTypeEvent *E) noexcept; +extern __ocl_event_t __spirv_GroupAsyncCopy(int32_t Scope, __global dataT *Dest, + __local dataT *Src, + size_t NumElements, size_t Stride, + __ocl_event_t E) noexcept; -#define OpGroupAsyncCopyGlobalToLocal OpGroupAsyncCopy -#define OpGroupAsyncCopyLocalToGlobal OpGroupAsyncCopy +#define OpGroupAsyncCopyGlobalToLocal __spirv_GroupAsyncCopy +#define OpGroupAsyncCopyLocalToGlobal __spirv_GroupAsyncCopy // Atomic SPIR-V builtins #define __SPIRV_ATOMIC_LOAD(AS, Type) \ - extern Type OpAtomicLoad(AS const Type *P, Scope S, MemorySemantics O); + extern Type __spirv_AtomicLoad(AS const Type *P, Scope S, MemorySemantics O); #define __SPIRV_ATOMIC_STORE(AS, Type) \ - extern void OpAtomicStore(AS Type *P, Scope S, MemorySemantics O, Type V); + extern void __spirv_AtomicStore(AS Type *P, Scope S, MemorySemantics O, \ + Type V); #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \ - extern Type OpAtomicExchange(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicExchange(AS Type *P, Scope S, MemorySemantics O, \ + Type V); #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ - extern Type OpAtomicCompareExchange(AS Type *P, Scope S, MemorySemantics E, \ - MemorySemantics U, Type V, Type C); + extern Type __spirv_AtomicCompareExchange( \ + AS Type *P, Scope S, MemorySemantics E, MemorySemantics U, Type V, \ + Type C); #define __SPIRV_ATOMIC_IADD(AS, Type) \ - extern Type OpAtomicIAdd(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicIAdd(AS Type *P, Scope S, MemorySemantics O, \ + Type V); #define __SPIRV_ATOMIC_ISUB(AS, Type) \ - extern Type OpAtomicISub(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicISub(AS Type *P, Scope S, MemorySemantics O, \ + Type V); #define __SPIRV_ATOMIC_SMIN(AS, Type) \ - extern Type OpAtomicSMin(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicSMin(AS Type *P, Scope S, MemorySemantics O, \ + Type V); #define __SPIRV_ATOMIC_UMIN(AS, Type) \ - extern Type OpAtomicUMin(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicUMin(AS Type *P, Scope S, MemorySemantics O, \ + Type V); #define __SPIRV_ATOMIC_SMAX(AS, Type) \ - extern Type OpAtomicSMax(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicSMax(AS Type *P, Scope S, MemorySemantics O, \ + Type V); #define __SPIRV_ATOMIC_UMAX(AS, Type) \ - extern Type OpAtomicUMax(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicUMax(AS Type *P, Scope S, MemorySemantics O, \ + Type V); #define __SPIRV_ATOMIC_AND(AS, Type) \ - extern Type OpAtomicAnd(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicAnd(AS Type *P, Scope S, MemorySemantics O, Type V); #define __SPIRV_ATOMIC_OR(AS, Type) \ - extern Type OpAtomicOr(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicOr(AS Type *P, Scope S, MemorySemantics O, Type V); #define __SPIRV_ATOMIC_XOR(AS, Type) \ - extern Type OpAtomicXor(AS Type *P, Scope S, MemorySemantics O, Type V); + extern Type __spirv_AtomicXor(AS Type *P, Scope S, MemorySemantics O, Type V); #define __SPIRV_ATOMIC_FLOAT(AS, Type) \ __SPIRV_ATOMIC_LOAD(AS, Type) \ @@ -87,14 +94,16 @@ OpGroupAsyncCopy(int32_t Scope, __global dataT *Dest, __local dataT *Src, // of atomic min/max based on the signed-ness of the type #define __SPIRV_ATOMIC_MINMAX(AS, Op) \ template \ - typename std::enable_if::value, T>::type OpAtomic##Op( \ - AS T *Ptr, Scope Scope, MemorySemantics Semantics, T Value) { \ - return OpAtomicS##Op(Ptr, Scope, Semantics, Value); \ + typename std::enable_if::value, T>::type \ + __spirv_Atomic##Op(AS T *Ptr, Scope Scope, MemorySemantics Semantics, \ + T Value) { \ + return __spirv_AtomicS##Op(Ptr, Scope, Semantics, Value); \ } \ template \ - typename std::enable_if::value, T>::type OpAtomic##Op( \ - AS T *Ptr, Scope Scope, MemorySemantics Semantics, T Value) { \ - return OpAtomicU##Op(Ptr, Scope, Semantics, Value); \ + typename std::enable_if::value, T>::type \ + __spirv_Atomic##Op(AS T *Ptr, Scope Scope, MemorySemantics Semantics, \ + T Value) { \ + return __spirv_AtomicU##Op(Ptr, Scope, Semantics, Value); \ } #define __SPIRV_ATOMICS(macro, Arg) macro(__global, Arg) macro(__local, Arg) @@ -109,64 +118,69 @@ __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min) __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max) -extern bool OpGroupAll(int32_t Scope, bool Predicate) noexcept; +extern bool __spirv_GroupAll(int32_t Scope, bool Predicate) noexcept; -extern bool OpGroupAny(int32_t Scope, bool Predicate) noexcept; +extern bool __spirv_GroupAny(int32_t Scope, bool Predicate) noexcept; template -extern dataT OpGroupBroadcast(int32_t Scope, dataT Value, - uint32_t LocalId) noexcept; +extern dataT __spirv_GroupBroadcast(int32_t Scope, dataT Value, + uint32_t LocalId) noexcept; template -extern dataT OpGroupIAdd(int32_t Scope, int32_t Op, dataT Value) noexcept; +extern dataT __spirv_GroupIAdd(int32_t Scope, int32_t Op, dataT Value) noexcept; template -extern dataT OpGroupFAdd(int32_t Scope, int32_t Op, dataT Value) noexcept; +extern dataT __spirv_GroupFAdd(int32_t Scope, int32_t Op, dataT Value) noexcept; template -extern dataT OpGroupUMin(int32_t Scope, int32_t Op, dataT Value) noexcept; +extern dataT __spirv_GroupUMin(int32_t Scope, int32_t Op, dataT Value) noexcept; template -extern dataT OpGroupSMin(int32_t Scope, int32_t Op, dataT Value) noexcept; +extern dataT __spirv_GroupSMin(int32_t Scope, int32_t Op, dataT Value) noexcept; template -extern dataT OpGroupFMin(int32_t Scope, int32_t Op, dataT Value) noexcept; +extern dataT __spirv_GroupFMin(int32_t Scope, int32_t Op, dataT Value) noexcept; template -extern dataT OpGroupUMax(int32_t Scope, int32_t Op, dataT Value) noexcept; +extern dataT __spirv_GroupUMax(int32_t Scope, int32_t Op, dataT Value) noexcept; template -extern dataT OpGroupSMax(int32_t Scope, int32_t Op, dataT Value) noexcept; +extern dataT __spirv_GroupSMax(int32_t Scope, int32_t Op, dataT Value) noexcept; template -extern dataT OpGroupFMax(int32_t Scope, int32_t Op, dataT Value) noexcept; +extern dataT __spirv_GroupFMax(int32_t Scope, int32_t Op, dataT Value) noexcept; template -extern dataT OpSubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept; +extern dataT __spirv_SubgroupShuffleINTEL(dataT Data, + uint32_t InvocationId) noexcept; template -extern dataT OpSubgroupShuffleDownINTEL(dataT Current, dataT Next, - uint32_t Delta) noexcept; +extern dataT __spirv_SubgroupShuffleDownINTEL(dataT Current, dataT Next, + uint32_t Delta) noexcept; template -extern dataT OpSubgroupShuffleUpINTEL(dataT Previous, dataT Current, - uint32_t Delta) noexcept; +extern dataT __spirv_SubgroupShuffleUpINTEL(dataT Previous, dataT Current, + uint32_t Delta) noexcept; template -extern dataT OpSubgroupShuffleXorINTEL(dataT Data, uint32_t Value) noexcept; +extern dataT __spirv_SubgroupShuffleXorINTEL(dataT Data, + uint32_t Value) noexcept; template -extern dataT OpSubgroupBlockReadINTEL(const __global uint16_t *Ptr) noexcept; +extern dataT +__spirv_SubgroupBlockReadINTEL(const __global uint16_t *Ptr) noexcept; template -extern void OpSubgroupBlockWriteINTEL(__global uint16_t *Ptr, - dataT Data) noexcept; +extern void __spirv_SubgroupBlockWriteINTEL(__global uint16_t *Ptr, + dataT Data) noexcept; template -extern dataT OpSubgroupBlockReadINTEL(const __global uint32_t *Ptr) noexcept; +extern dataT +__spirv_SubgroupBlockReadINTEL(const __global uint32_t *Ptr) noexcept; template -extern void OpSubgroupBlockWriteINTEL(__global uint32_t *Ptr, - dataT Data) noexcept; +extern void __spirv_SubgroupBlockWriteINTEL(__global uint32_t *Ptr, + dataT Data) noexcept; -extern void prefetch(const __global char *Ptr, size_t NumBytes) noexcept; +extern void __spirv_ocl_prefetch(const __global char *Ptr, + size_t NumBytes) noexcept; #else // if !__SYCL_DEVICE_ONLY__ template -extern OpTypeEvent * +extern __ocl_event_t OpGroupAsyncCopyGlobalToLocal(int32_t Scope, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, - OpTypeEvent *E) noexcept { + __ocl_event_t E) noexcept { for (int i = 0; i < NumElements; i++) { Dest[i] = Src[i * Stride]; } @@ -175,10 +189,10 @@ OpGroupAsyncCopyGlobalToLocal(int32_t Scope, dataT *Dest, dataT *Src, } template -extern OpTypeEvent * +extern __ocl_event_t OpGroupAsyncCopyLocalToGlobal(int32_t Scope, dataT *Dest, dataT *Src, size_t NumElements, size_t Stride, - OpTypeEvent *E) noexcept { + __ocl_event_t E) noexcept { for (int i = 0; i < NumElements; i++) { Dest[i * Stride] = Src[i]; } @@ -186,17 +200,15 @@ OpGroupAsyncCopyLocalToGlobal(int32_t Scope, dataT *Dest, dataT *Src, return nullptr; } -extern void prefetch(const char *Ptr, size_t NumBytes) noexcept; +extern void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept; #endif // !__SYCL_DEVICE_ONLY__ -extern void OpControlBarrier(Scope Execution, Scope Memory, - uint32_t Semantics) noexcept; +extern void __spirv_ControlBarrier(Scope Execution, Scope Memory, + uint32_t Semantics) noexcept; -extern void OpMemoryBarrier(Scope Memory, uint32_t Semantics) noexcept; +extern void __spirv_MemoryBarrier(Scope Memory, uint32_t Semantics) noexcept; -extern void OpGroupWaitEvents(int32_t Scope, uint32_t NumEvents, - OpTypeEvent ** WaitEvents) noexcept; +extern void __spirv_GroupWaitEvents(int32_t Scope, uint32_t NumEvents, + __ocl_event_t *WaitEvents) noexcept; -} // namespace __spirv -} // namespace cl diff --git a/sycl/include/CL/__spirv/spirv_types.hpp b/sycl/include/CL/__spirv/spirv_types.hpp index c7dbd2b19ce46..a9b6a606d0d19 100644 --- a/sycl/include/CL/__spirv/spirv_types.hpp +++ b/sycl/include/CL/__spirv/spirv_types.hpp @@ -10,11 +10,9 @@ #include -namespace cl { -namespace __spirv { - // TODO: include the header file with SPIR-V declarations from SPIRV-Headers // project. + enum Scope { CrossDevice = 0, Device = 1, @@ -23,6 +21,7 @@ enum Scope { Invocation = 4, }; + enum MemorySemantics { None = 0x0, Acquire = 0x2, @@ -40,12 +39,10 @@ enum MemorySemantics { // This class does not have definition, it is only predeclared here. // The pointers to this class objects can be passed to or returned from // SPIRV built-in functions. -// Only in such cases the class is recognized as SPIRV type OpTypeEvent. -class OpTypeEvent; - -// SPIRV type for sampler class -class OpTypeSampler; +// Only in such cases the class is recognized as SPIRV type __ocl_event_t. +#ifndef __SYCL_DEVICE_ONLY__ +typedef void* __ocl_event_t; +typedef void* __ocl_sampler_t; +#endif enum GroupOperation { Reduce = 0, InclusiveScan = 1, ExclusiveScan = 2 }; -} // namespace __spirv -} // namespace cl diff --git a/sycl/include/CL/__spirv/spirv_vars.hpp b/sycl/include/CL/__spirv/spirv_vars.hpp index 7e07f4ea978b1..74670fa0fb557 100644 --- a/sycl/include/CL/__spirv/spirv_vars.hpp +++ b/sycl/include/CL/__spirv/spirv_vars.hpp @@ -10,22 +10,19 @@ #ifdef __SYCL_DEVICE_ONLY__ -namespace cl { -namespace __spirv { typedef size_t size_t_vec __attribute__((ext_vector_type(3))); - -extern const __constant size_t_vec VarGlobalSize; -extern const __constant size_t_vec VarGlobalInvocationId; -extern const __constant size_t_vec VarWorkgroupSize; -extern const __constant size_t_vec VarLocalInvocationId; -extern const __constant size_t_vec VarWorkgroupId; -extern const __constant size_t_vec VarGlobalOffset; +extern "C" const __constant size_t_vec __spirv_BuiltInGlobalSize; +extern "C" const __constant size_t_vec __spirv_BuiltInGlobalInvocationId; +extern "C" const __constant size_t_vec __spirv_BuiltInWorkgroupSize; +extern "C" const __constant size_t_vec __spirv_BuiltInLocalInvocationId; +extern "C" const __constant size_t_vec __spirv_BuiltInWorkgroupId; +extern "C" const __constant size_t_vec __spirv_BuiltInGlobalOffset; #define DEFINE_INT_ID_TO_XYZ_CONVERTER(POSTFIX) \ template static size_t get##POSTFIX(); \ - template <> size_t get##POSTFIX<0>() { return Var##POSTFIX.x; } \ - template <> size_t get##POSTFIX<1>() { return Var##POSTFIX.y; } \ - template <> size_t get##POSTFIX<2>() { return Var##POSTFIX.z; } + template <> size_t get##POSTFIX<0>() { return __spirv_BuiltIn##POSTFIX.x; } \ + template <> size_t get##POSTFIX<1>() { return __spirv_BuiltIn##POSTFIX.y; } \ + template <> size_t get##POSTFIX<2>() { return __spirv_BuiltIn##POSTFIX.z; } DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalSize); DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalInvocationId) @@ -36,13 +33,11 @@ DEFINE_INT_ID_TO_XYZ_CONVERTER(GlobalOffset) #undef DEFINE_INT_ID_TO_XYZ_CONVERTER -extern const __constant uint32_t VarSubgroupSize; -extern const __constant uint32_t VarSubgroupMaxSize; -extern const __constant uint32_t VarNumSubgroups; -extern const __constant uint32_t VarNumEnqueuedSubgroups; -extern const __constant uint32_t VarSubgroupId; -extern const __constant uint32_t VarSubgroupLocalInvocationId; +extern "C" const __constant uint32_t __spirv_BuiltInSubgroupSize; +extern "C" const __constant uint32_t __spirv_BuiltInSubgroupMaxSize; +extern "C" const __constant uint32_t __spirv_BuiltInNumSubgroups; +extern "C" const __constant uint32_t __spirv_BuiltInNumEnqueuedSubgroups; +extern "C" const __constant uint32_t __spirv_BuiltInSubgroupId; +extern "C" const __constant uint32_t __spirv_BuiltInSubgroupLocalInvocationId; -} // namespace __spirv -} // namespace cl #endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/CL/sycl/atomic.hpp b/sycl/include/CL/sycl/atomic.hpp index 62e9bec2ce48f..4080e3172dc9b 100644 --- a/sycl/include/CL/sycl/atomic.hpp +++ b/sycl/include/CL/sycl/atomic.hpp @@ -51,16 +51,16 @@ template struct IsValidAtomicAddressSpace { // a SPIR-V memory scope template struct GetSpirvMemoryScope {}; template <> struct GetSpirvMemoryScope { - static constexpr auto scope = cl::__spirv::Scope::Device; + static constexpr auto scope = Scope::Device; }; template <> struct GetSpirvMemoryScope { - static constexpr auto scope = ::cl::__spirv::Scope::Workgroup; + static constexpr auto scope = Scope::Workgroup; }; // Translate the cl::sycl::memory_order to a SPIR-V builtin order -static inline ::cl::__spirv::MemorySemantics +static inline MemorySemantics getSpirvMemorySemantics(memory_order Order) { - return ::cl::__spirv::MemorySemantics::None; + return MemorySemantics::None; } } // namespace detail @@ -76,7 +76,7 @@ namespace detail { // into std::memory_order // Only relaxed memory semantics are supported currently static inline std::memory_order -getStdMemoryOrder(::cl::__spirv::MemorySemantics MS) { +getStdMemoryOrder(MemorySemantics MS) { return std::memory_order_relaxed; } static inline std::memory_order getStdMemoryOrder(::cl::sycl::memory_order MS) { @@ -84,52 +84,52 @@ static inline std::memory_order getStdMemoryOrder(::cl::sycl::memory_order MS) { } } // namespace detail } // namespace sycl +} // namespace cl // std::atomic version of atomic SPIR-V builtins -namespace __spirv { template -void OpAtomicStore(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +void __spirv_AtomicStore(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { Ptr->store(V, ::cl::sycl::detail::getStdMemoryOrder(MS)); } template -T OpAtomicLoad(const std::atomic *Ptr, Scope S, MemorySemantics MS) { +T __spirv_AtomicLoad(const std::atomic *Ptr, Scope S, MemorySemantics MS) { return Ptr->load(::cl::sycl::detail::getStdMemoryOrder(MS)); } template -T OpAtomicExchange(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +T __spirv_AtomicExchange(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { return Ptr->exchange(V, ::cl::sycl::detail::getStdMemoryOrder(MS)); } template -extern T OpAtomicIAdd(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +extern T __spirv_AtomicIAdd(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { return Ptr->fetch_add(V, ::cl::sycl::detail::getStdMemoryOrder(MS)); } template -extern T OpAtomicISub(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +extern T __spirv_AtomicISub(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { return Ptr->fetch_sub(V, ::cl::sycl::detail::getStdMemoryOrder(MS)); } template -extern T OpAtomicAnd(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +extern T __spirv_AtomicAnd(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { return Ptr->fetch_and(V, ::cl::sycl::detail::getStdMemoryOrder(MS)); } template -extern T OpAtomicOr(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +extern T __spirv_AtomicOr(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { return Ptr->fetch_or(V, ::cl::sycl::detail::getStdMemoryOrder(MS)); } template -extern T OpAtomicXor(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +extern T __spirv_AtomicXor(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { return Ptr->fetch_xor(V, ::cl::sycl::detail::getStdMemoryOrder(MS)); } template -extern T OpAtomicMin(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +extern T __spirv_AtomicMin(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { std::memory_order MemoryOrder = ::cl::sycl::detail::getStdMemoryOrder(MS); T Val = Ptr->load(MemoryOrder); while (V < Val) { @@ -141,7 +141,7 @@ extern T OpAtomicMin(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { } template -extern T OpAtomicMax(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { +extern T __spirv_AtomicMax(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { std::memory_order MemoryOrder = ::cl::sycl::detail::getStdMemoryOrder(MS); T Val = Ptr->load(MemoryOrder); while (V > Val) { @@ -152,8 +152,6 @@ extern T OpAtomicMax(std::atomic *Ptr, Scope S, MemorySemantics MS, T V) { return Val; } -} // namespace __spirv -} // namespace cl #endif // !defined(__SYCL_DEVICE_ONLY__) namespace cl { @@ -187,17 +185,17 @@ class atomic { } void store(T Operand, memory_order Order = memory_order::relaxed) { - ::cl::__spirv::OpAtomicStore( + __spirv_AtomicStore( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } T load(memory_order Order = memory_order::relaxed) const { - return ::cl::__spirv::OpAtomicLoad(Ptr, SpirvScope, + return __spirv_AtomicLoad(Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order)); } T exchange(T Operand, memory_order Order = memory_order::relaxed) { - return ::cl::__spirv::OpAtomicExchange( + return __spirv_AtomicExchange( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } @@ -207,7 +205,7 @@ class atomic { memory_order FailOrder = memory_order::relaxed) { STATIC_ASSERT_NOT_FLOAT(T); #ifdef __SYCL_DEVICE_ONLY__ - T Value = ::cl::__spirv::OpAtomicCompareExchange( + T Value = __spirv_AtomicCompareExchange( Ptr, SpirvScope, detail::getSpirvMemorySemantics(SuccessOrder), detail::getSpirvMemorySemantics(FailOrder), Desired, Expected); return (Value == Expected); @@ -220,43 +218,43 @@ class atomic { T fetch_add(T Operand, memory_order Order = memory_order::relaxed) { STATIC_ASSERT_NOT_FLOAT(T); - return ::cl::__spirv::OpAtomicIAdd( + return __spirv_AtomicIAdd( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } T fetch_sub(T Operand, memory_order Order = memory_order::relaxed) { STATIC_ASSERT_NOT_FLOAT(T); - return ::cl::__spirv::OpAtomicISub( + return __spirv_AtomicISub( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } T fetch_and(T Operand, memory_order Order = memory_order::relaxed) { STATIC_ASSERT_NOT_FLOAT(T); - return ::cl::__spirv::OpAtomicAnd( + return __spirv_AtomicAnd( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } T fetch_or(T Operand, memory_order Order = memory_order::relaxed) { STATIC_ASSERT_NOT_FLOAT(T); - return ::cl::__spirv::OpAtomicOr( + return __spirv_AtomicOr( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } T fetch_xor(T Operand, memory_order Order = memory_order::relaxed) { STATIC_ASSERT_NOT_FLOAT(T); - return ::cl::__spirv::OpAtomicXor( + return __spirv_AtomicXor( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } T fetch_min(T Operand, memory_order Order = memory_order::relaxed) { STATIC_ASSERT_NOT_FLOAT(T); - return ::cl::__spirv::OpAtomicMin( + return __spirv_AtomicMin( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } T fetch_max(T Operand, memory_order Order = memory_order::relaxed) { STATIC_ASSERT_NOT_FLOAT(T); - return ::cl::__spirv::OpAtomicMax( + return __spirv_AtomicMax( Ptr, SpirvScope, detail::getSpirvMemorySemantics(Order), Operand); } diff --git a/sycl/include/CL/sycl/builtins.hpp b/sycl/include/CL/sycl/builtins.hpp index 3785e57751fb8..e159fb372118a 100644 --- a/sycl/include/CL/sycl/builtins.hpp +++ b/sycl/include/CL/sycl/builtins.hpp @@ -20,7 +20,7 @@ namespace cl { namespace sycl { #ifdef __SYCL_DEVICE_ONLY__ -namespace __sycl_std = cl::__spirv; +#define __sycl_std #else namespace __sycl_std = __host_std; #endif @@ -1020,7 +1020,7 @@ cross(T p0, T p1) __NOEXC { template typename std::enable_if::value, T>::type dot(T p0, T p1) __NOEXC { - return __sycl_std::__invoke_OpFMul(p0, p1); + return __sycl_std::__invoke_FMul(p0, p1); } // float dot (vgengeofloat p0, vgengeofloat p1) @@ -1028,7 +1028,7 @@ template typename std::enable_if::value, cl::sycl::cl_float>::type dot(T p0, T p1) __NOEXC { - return __sycl_std::__invoke_OpDot(p0, p1); + return __sycl_std::__invoke_Dot(p0, p1); } // double dot (vgengeodouble p0, vgengeodouble p1) @@ -1036,7 +1036,7 @@ template typename std::enable_if::value, cl::sycl::cl_double>::type dot(T p0, T p1) __NOEXC { - return __sycl_std::__invoke_OpDot(p0, p1); + return __sycl_std::__invoke_Dot(p0, p1); } // half dot (vgengeohalf p0, vgengeohalf p1) @@ -1044,7 +1044,7 @@ template typename std::enable_if::value, cl::sycl::cl_half>::type dot(T p0, T p1) __NOEXC { - return __sycl_std::__invoke_OpDot(p0, p1); + return __sycl_std::__invoke_Dot(p0, p1); } // float distance (gengeofloat p0, gengeofloat p1) @@ -1162,7 +1162,7 @@ template ::value, T>::type> detail::common_rel_ret_t isequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdEqual>(x, y)); + __sycl_std::__invoke_FOrdEqual>(x, y)); } // int isnotequal (half x, half y) @@ -1174,7 +1174,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnotequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFUnordNotEqual>(x, y)); + __sycl_std::__invoke_FUnordNotEqual>(x, y)); } // int isgreater (half x, half y) @@ -1186,7 +1186,7 @@ template ::value, T>::type> detail::common_rel_ret_t isgreater(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdGreaterThan>(x, y)); + __sycl_std::__invoke_FOrdGreaterThan>(x, y)); } // int isgreaterequal (half x, half y) @@ -1198,7 +1198,7 @@ template ::value, T>::type> detail::common_rel_ret_t isgreaterequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdGreaterThanEqual>(x, y)); + __sycl_std::__invoke_FOrdGreaterThanEqual>(x, y)); } // int isless (half x, half y) @@ -1210,7 +1210,7 @@ template ::value, T>::type> detail::common_rel_ret_t isless(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdLessThan>(x, y)); + __sycl_std::__invoke_FOrdLessThan>(x, y)); } // int islessequal (half x, half y) @@ -1222,7 +1222,7 @@ template ::value, T>::type> detail::common_rel_ret_t islessequal(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpFOrdLessThanEqual>(x, y)); + __sycl_std::__invoke_FOrdLessThanEqual>(x, y)); } // int islessgreater (half x, half y) @@ -1234,7 +1234,7 @@ template ::value, T>::type> detail::common_rel_ret_t islessgreater(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpLessOrGreater>(x, y)); + __sycl_std::__invoke_LessOrGreater>(x, y)); } // int isfinite (half x) @@ -1246,7 +1246,7 @@ template ::value, T>::type> detail::common_rel_ret_t isfinite(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsFinite>(x)); + __sycl_std::__invoke_IsFinite>(x)); } // int isinf (half x) @@ -1258,7 +1258,7 @@ template ::value, T>::type> detail::common_rel_ret_t isinf(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsInf>(x)); + __sycl_std::__invoke_IsInf>(x)); } // int isnan (half x) @@ -1270,7 +1270,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnan(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsNan>(x)); + __sycl_std::__invoke_IsNan>(x)); } // int isnormal (half x) @@ -1282,7 +1282,7 @@ template ::value, T>::type> detail::common_rel_ret_t isnormal(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpIsNormal>(x)); + __sycl_std::__invoke_IsNormal>(x)); } // int isordered (half x) @@ -1294,7 +1294,7 @@ template ::value, T>::type> detail::common_rel_ret_t isordered(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpOrdered>(x, y)); + __sycl_std::__invoke_Ordered>(x, y)); } // int isunordered (half x, half y) @@ -1306,7 +1306,7 @@ template ::value, T>::type> detail::common_rel_ret_t isunordered(T x, T y) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpUnordered>(x, y)); + __sycl_std::__invoke_Unordered>(x, y)); } // int signbit (half x) @@ -1318,7 +1318,7 @@ template ::value, T>::type> detail::common_rel_ret_t signbit(T x) __NOEXC { return detail::RelConverter::apply( - __sycl_std::__invoke_OpSignBitSet>(x)); + __sycl_std::__invoke_SignBitSet>(x)); } // int any (sigeninteger x) @@ -1335,7 +1335,7 @@ typename std::enable_if::value, cl::sycl::cl_int>::type any(T x) __NOEXC { return detail::rel_sign_bit_test_ret_t( - __sycl_std::__invoke_OpAny>( + __sycl_std::__invoke_Any>( detail::rel_sign_bit_test_arg_t(x))); } @@ -1353,7 +1353,7 @@ typename std::enable_if::value, cl::sycl::cl_int>::type all(T x) __NOEXC { return detail::rel_sign_bit_test_ret_t( - __sycl_std::__invoke_OpAll>( + __sycl_std::__invoke_All>( detail::rel_sign_bit_test_arg_t(x))); } @@ -1370,7 +1370,7 @@ typename std::enable_if::value && detail::is_igeninteger::value, T>::type select(T a, T b, T2 c) __NOEXC { - return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); + return __sycl_std::__invoke_Select(detail::select_arg_c_t(c), b, a); } // geninteger select (geninteger a, geninteger b, ugeninteger c) @@ -1379,7 +1379,7 @@ typename std::enable_if::value && detail::is_ugeninteger::value, T>::type select(T a, T b, T2 c) __NOEXC { - return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); + return __sycl_std::__invoke_Select(detail::select_arg_c_t(c), b, a); } // genfloatf select (genfloatf a, genfloatf b, genint c) @@ -1387,7 +1387,7 @@ template typename std::enable_if< detail::is_genfloatf::value && detail::is_genint::value, T>::type select(T a, T b, T2 c) __NOEXC { - return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); + return __sycl_std::__invoke_Select(detail::select_arg_c_t(c), b, a); } // genfloatf select (genfloatf a, genfloatf b, ugenint c) @@ -1395,7 +1395,7 @@ template typename std::enable_if< detail::is_genfloatf::value && detail::is_ugenint::value, T>::type select(T a, T b, T2 c) __NOEXC { - return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); + return __sycl_std::__invoke_Select(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, igeninteger64 c) @@ -1404,7 +1404,7 @@ typename std::enable_if::value && detail::is_igeninteger64bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); + return __sycl_std::__invoke_Select(detail::select_arg_c_t(c), b, a); } // genfloatd select (genfloatd a, genfloatd b, ugeninteger64 c) @@ -1413,7 +1413,7 @@ typename std::enable_if::value && detail::is_ugeninteger64bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); + return __sycl_std::__invoke_Select(detail::select_arg_c_t(c), b, a); } // genfloath select (genfloath a, genfloath b, igeninteger16 c) @@ -1422,7 +1422,7 @@ typename std::enable_if::value && detail::is_igeninteger16bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); + return __sycl_std::__invoke_Select(detail::select_arg_c_t(c), b, a); } // genfloath select (genfloath a, genfloath b, ugeninteger16 c) @@ -1431,7 +1431,7 @@ typename std::enable_if::value && detail::is_ugeninteger16bit::value, T>::type select(T a, T b, T2 c) __NOEXC { - return __sycl_std::__invoke_OpSelect(detail::select_arg_c_t(c), b, a); + return __sycl_std::__invoke_Select(detail::select_arg_c_t(c), b, a); } namespace native { diff --git a/sycl/include/CL/sycl/detail/builtins.hpp b/sycl/include/CL/sycl/detail/builtins.hpp index 4300e74d564c6..5e5b8bacdbd99 100644 --- a/sycl/include/CL/sycl/detail/builtins.hpp +++ b/sycl/include/CL/sycl/detail/builtins.hpp @@ -16,221 +16,239 @@ // TODO Decide whether to mark functions with this attribute. #define __NOEXC /*noexcept*/ -#define MAKE_CALL_ARG1(call) \ +#ifdef __SYCL_DEVICE_ONLY__ +#define __FUNC_PREFIX_OCL __spirv_ocl_ +#define __FUNC_PREFIX_CORE __spirv_ +#else +#define __FUNC_PREFIX_OCL +#define __FUNC_PREFIX_CORE +#endif + +#define PPCAT_NX(A, B) A ## B +#define PPCAT(A, B) PPCAT_NX(A, B) + +#define MAKE_CALL_ARG1(call, prefix) \ template \ - ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ - __invoke_##call(T1 t1) __NOEXC { \ + ALWAYS_INLINE \ + typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ + T1 t1) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ - extern Ret call(Arg1); \ - return call(cl::sycl::detail::TryToGetPointer(t1)); \ + extern Ret PPCAT(prefix, call)(Arg1); \ + return PPCAT(prefix, call)(cl::sycl::detail::TryToGetPointer(t1)); \ } -#define MAKE_CALL_ARG2(call) \ +#define MAKE_CALL_ARG2(call, prefix) \ template \ - ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ - __invoke_##call(T1 t1, T2 t2) __NOEXC { \ + ALWAYS_INLINE \ + typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ + T1 t1, T2 t2) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg2 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ - extern Ret call(Arg1, Arg2); \ - return call(cl::sycl::detail::TryToGetPointer(t1), \ - cl::sycl::detail::TryToGetPointer(t2)); \ + extern Ret PPCAT(prefix, call)(Arg1, Arg2); \ + return PPCAT(prefix, call)(cl::sycl::detail::TryToGetPointer(t1), \ + cl::sycl::detail::TryToGetPointer(t2)); \ } -#define MAKE_CALL_ARG3(call) \ +#define MAKE_CALL_ARG3(call, prefix) \ template \ - ALWAYS_INLINE typename cl::sycl::detail::ConvertToOpenCLType::type \ - __invoke_##call(T1 t1, T2 t2, T3 t3) __NOEXC { \ + ALWAYS_INLINE \ + typename cl::sycl::detail::ConvertToOpenCLType::type __invoke_##call( \ + T1 t1, T2 t2, T3 t3) __NOEXC { \ using Ret = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg1 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg2 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ using Arg3 = typename cl::sycl::detail::ConvertToOpenCLType::type; \ - extern Ret call(Arg1, Arg2, Arg3); \ - return call(cl::sycl::detail::TryToGetPointer(t1), \ - cl::sycl::detail::TryToGetPointer(t2), \ - cl::sycl::detail::TryToGetPointer(t3)); \ + extern Ret PPCAT(prefix, call)(Arg1, Arg2, Arg3); \ + return PPCAT(prefix, call)(cl::sycl::detail::TryToGetPointer(t1), \ + cl::sycl::detail::TryToGetPointer(t2), \ + cl::sycl::detail::TryToGetPointer(t3)); \ } +#ifndef __SYCL_DEVICE_ONLY__ namespace cl { -#ifdef __SYCL_DEVICE_ONLY__ -namespace __spirv { -#else namespace __host_std { #endif // __SYCL_DEVICE_ONLY__ /* ----------------- 4.13.3 Math functions. ---------------------------------*/ -MAKE_CALL_ARG1(acos) -MAKE_CALL_ARG1(acosh) -MAKE_CALL_ARG1(acospi) -MAKE_CALL_ARG1(asin) -MAKE_CALL_ARG1(asinh) -MAKE_CALL_ARG1(asinpi) -MAKE_CALL_ARG1(atan) -MAKE_CALL_ARG2(atan2) -MAKE_CALL_ARG1(atanh) -MAKE_CALL_ARG1(atanpi) -MAKE_CALL_ARG2(atan2pi) -MAKE_CALL_ARG1(cbrt) -MAKE_CALL_ARG1(ceil) -MAKE_CALL_ARG2(copysign) -MAKE_CALL_ARG1(cos) -MAKE_CALL_ARG1(cosh) -MAKE_CALL_ARG1(cospi) -MAKE_CALL_ARG1(erfc) -MAKE_CALL_ARG1(erf) -MAKE_CALL_ARG1(exp) -MAKE_CALL_ARG1(exp2) -MAKE_CALL_ARG1(exp10) -MAKE_CALL_ARG1(expm1) -MAKE_CALL_ARG1(fabs) -MAKE_CALL_ARG2(fdim) -MAKE_CALL_ARG1(floor) -MAKE_CALL_ARG3(fma) -MAKE_CALL_ARG2(fmax) -MAKE_CALL_ARG2(fmin) -MAKE_CALL_ARG2(fmod) -MAKE_CALL_ARG2(fract) -MAKE_CALL_ARG2(frexp) -MAKE_CALL_ARG2(hypot) -MAKE_CALL_ARG1(ilogb) -MAKE_CALL_ARG2(ldexp) -MAKE_CALL_ARG1(lgamma) -MAKE_CALL_ARG2(lgamma_r) -MAKE_CALL_ARG1(log) -MAKE_CALL_ARG1(log2) -MAKE_CALL_ARG1(log10) -MAKE_CALL_ARG1(log1p) -MAKE_CALL_ARG1(logb) -MAKE_CALL_ARG3(mad) -MAKE_CALL_ARG2(maxmag) -MAKE_CALL_ARG2(minmag) -MAKE_CALL_ARG2(modf) -MAKE_CALL_ARG1(nan) -MAKE_CALL_ARG2(nextafter) -MAKE_CALL_ARG2(pow) -MAKE_CALL_ARG2(pown) -MAKE_CALL_ARG2(powr) -MAKE_CALL_ARG2(remainder) -MAKE_CALL_ARG3(remquo) -MAKE_CALL_ARG1(rint) -MAKE_CALL_ARG2(rootn) -MAKE_CALL_ARG1(round) -MAKE_CALL_ARG1(rsqrt) -MAKE_CALL_ARG1(sin) -MAKE_CALL_ARG2(sincos) -MAKE_CALL_ARG1(sinh) -MAKE_CALL_ARG1(sinpi) -MAKE_CALL_ARG1(sqrt) -MAKE_CALL_ARG1(tan) -MAKE_CALL_ARG1(tanh) -MAKE_CALL_ARG1(tanpi) -MAKE_CALL_ARG1(tgamma) -MAKE_CALL_ARG1(trunc) -MAKE_CALL_ARG1(native_cos) -MAKE_CALL_ARG2(native_divide) -MAKE_CALL_ARG1(native_exp) -MAKE_CALL_ARG1(native_exp2) -MAKE_CALL_ARG1(native_exp10) -MAKE_CALL_ARG1(native_log) -MAKE_CALL_ARG1(native_log2) -MAKE_CALL_ARG1(native_log10) -MAKE_CALL_ARG2(native_powr) -MAKE_CALL_ARG1(native_recip) -MAKE_CALL_ARG1(native_rsqrt) -MAKE_CALL_ARG1(native_sin) -MAKE_CALL_ARG1(native_sqrt) -MAKE_CALL_ARG1(native_tan) -MAKE_CALL_ARG1(half_cos) -MAKE_CALL_ARG2(half_divide) -MAKE_CALL_ARG1(half_exp) -MAKE_CALL_ARG1(half_exp2) -MAKE_CALL_ARG1(half_exp10) -MAKE_CALL_ARG1(half_log) -MAKE_CALL_ARG1(half_log2) -MAKE_CALL_ARG1(half_log10) -MAKE_CALL_ARG2(half_powr) -MAKE_CALL_ARG1(half_recip) -MAKE_CALL_ARG1(half_rsqrt) -MAKE_CALL_ARG1(half_sin) -MAKE_CALL_ARG1(half_sqrt) -MAKE_CALL_ARG1(half_tan) +MAKE_CALL_ARG1(acos, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(acosh, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(acospi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(asin, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(asinh, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(asinpi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(atan, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(atan2, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(atanh, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(atanpi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(atan2pi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(cbrt, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(ceil, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(copysign, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(cos, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(cosh, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(cospi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(erfc, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(erf, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(exp, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(exp2, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(exp10, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(expm1, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(fabs, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(fdim, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(floor, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(fma, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(fmax, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(fmin, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(fmod, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(fract, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(frexp, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(hypot, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(ilogb, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(ldexp, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(lgamma, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(lgamma_r, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(log, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(log2, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(log10, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(log1p, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(logb, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(mad, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(maxmag, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(minmag, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(modf, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(nan, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(nextafter, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(pow, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(pown, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(powr, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(remainder, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(remquo, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(rint, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(rootn, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(round, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(rsqrt, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(sin, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(sincos, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(sinh, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(sinpi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(sqrt, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(tan, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(tanh, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(tanpi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(tgamma, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(trunc, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_cos, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(native_divide, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_exp, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_exp2, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_exp10, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_log, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_log2, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_log10, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(native_powr, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_recip, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_rsqrt, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_sin, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_sqrt, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(native_tan, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_cos, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(half_divide, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_exp, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_exp2, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_exp10, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_log, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_log2, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_log10, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(half_powr, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_recip, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_rsqrt, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_sin, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_sqrt, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(half_tan, __FUNC_PREFIX_OCL) /* --------------- 4.13.4 Integer functions. --------------------------------*/ -MAKE_CALL_ARG1(s_abs) -MAKE_CALL_ARG1(u_abs) -MAKE_CALL_ARG2(s_abs_diff) -MAKE_CALL_ARG2(u_abs_diff) -MAKE_CALL_ARG2(s_add_sat) -MAKE_CALL_ARG2(u_add_sat) -MAKE_CALL_ARG2(s_hadd) -MAKE_CALL_ARG2(u_hadd) -MAKE_CALL_ARG2(s_rhadd) -MAKE_CALL_ARG2(u_rhadd) -MAKE_CALL_ARG3(s_clamp) -MAKE_CALL_ARG3(u_clamp) -MAKE_CALL_ARG1(clz) -MAKE_CALL_ARG3(s_mad_hi) -MAKE_CALL_ARG3(u_mad_hi) -MAKE_CALL_ARG3(u_mad_sat) -MAKE_CALL_ARG3(s_mad_sat) -MAKE_CALL_ARG2(s_max) -MAKE_CALL_ARG2(u_max) -MAKE_CALL_ARG2(s_min) -MAKE_CALL_ARG2(u_min) -MAKE_CALL_ARG2(s_mul_hi) -MAKE_CALL_ARG2(u_mul_hi) -MAKE_CALL_ARG2(rotate) -MAKE_CALL_ARG2(s_sub_sat) -MAKE_CALL_ARG2(u_sub_sat) -MAKE_CALL_ARG2(u_upsample) -MAKE_CALL_ARG2(s_upsample) -MAKE_CALL_ARG1(popcount) -MAKE_CALL_ARG3(s_mad24) -MAKE_CALL_ARG3(u_mad24) -MAKE_CALL_ARG2(s_mul24) -MAKE_CALL_ARG2(u_mul24) +MAKE_CALL_ARG1(s_abs, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(u_abs, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_abs_diff, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_abs_diff, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_add_sat, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_add_sat, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_hadd, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_hadd, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_rhadd, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_rhadd, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(s_clamp, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(u_clamp, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(clz, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(s_mad_hi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(u_mad_hi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(u_mad_sat, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(s_mad_sat, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_max, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_max, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_min, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_min, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_mul_hi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_mul_hi, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(rotate, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_sub_sat, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_sub_sat, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_upsample, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_upsample, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(popcount, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(s_mad24, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(u_mad24, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(s_mul24, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(u_mul24, __FUNC_PREFIX_OCL) /* --------------- 4.13.5 Common functions. ---------------------------------*/ -MAKE_CALL_ARG3(fclamp) -MAKE_CALL_ARG1(degrees) -MAKE_CALL_ARG2(fmax_common) -MAKE_CALL_ARG2(fmin_common) -MAKE_CALL_ARG3(mix) -MAKE_CALL_ARG1(radians) -MAKE_CALL_ARG2(step) -MAKE_CALL_ARG3(smoothstep) -MAKE_CALL_ARG1(sign) +MAKE_CALL_ARG3(fclamp, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(degrees, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(fmax_common, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(fmin_common, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(mix, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(radians, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(step, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(smoothstep, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(sign, __FUNC_PREFIX_OCL) /* --------------- 4.13.6 Geometric Functions. ------------------------------*/ -MAKE_CALL_ARG2(cross) -MAKE_CALL_ARG2(OpDot) // dot -MAKE_CALL_ARG2(OpFMul) // dot -MAKE_CALL_ARG2(distance) -MAKE_CALL_ARG1(length) -MAKE_CALL_ARG1(normalize) -MAKE_CALL_ARG2(fast_distance) -MAKE_CALL_ARG1(fast_length) -MAKE_CALL_ARG1(fast_normalize) +MAKE_CALL_ARG2(cross, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(Dot, __FUNC_PREFIX_CORE) // dot +MAKE_CALL_ARG2(FMul, __FUNC_PREFIX_CORE) // dot +MAKE_CALL_ARG2(distance, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(length, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(normalize, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG2(fast_distance, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(fast_length, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG1(fast_normalize, __FUNC_PREFIX_OCL) /* --------------- 4.13.7 Relational functions. -----------------------------*/ -MAKE_CALL_ARG2(OpFOrdEqual) // isequal -MAKE_CALL_ARG2(OpFUnordNotEqual) // isnotequal -MAKE_CALL_ARG2(OpFOrdGreaterThan) // isgreater -MAKE_CALL_ARG2(OpFOrdGreaterThanEqual) // isgreaterequal -MAKE_CALL_ARG2(OpFOrdLessThan) // isless -MAKE_CALL_ARG2(OpFOrdLessThanEqual) // islessequal -MAKE_CALL_ARG2(OpLessOrGreater) // islessgreater -MAKE_CALL_ARG1(OpIsFinite) // isfinite -MAKE_CALL_ARG1(OpIsInf) // isinf -MAKE_CALL_ARG1(OpIsNan) // isnan -MAKE_CALL_ARG1(OpIsNormal) // isnormal -MAKE_CALL_ARG2(OpOrdered) // isordered -MAKE_CALL_ARG2(OpUnordered) // isunordered -MAKE_CALL_ARG1(OpSignBitSet) // signbit -MAKE_CALL_ARG1(OpAny) // any -MAKE_CALL_ARG1(OpAll) // all -MAKE_CALL_ARG3(bitselect) -MAKE_CALL_ARG3(OpSelect) // select -} // namespace __spirv or __host_std +MAKE_CALL_ARG2(FOrdEqual, __FUNC_PREFIX_CORE) // isequal +MAKE_CALL_ARG2(FUnordNotEqual, __FUNC_PREFIX_CORE) // isnotequal +MAKE_CALL_ARG2(FOrdGreaterThan, __FUNC_PREFIX_CORE) // isgreater +MAKE_CALL_ARG2(FOrdGreaterThanEqual, __FUNC_PREFIX_CORE) // isgreaterequal +MAKE_CALL_ARG2(FOrdLessThan, __FUNC_PREFIX_CORE) // isless +MAKE_CALL_ARG2(FOrdLessThanEqual, __FUNC_PREFIX_CORE) // islessequal +MAKE_CALL_ARG2(LessOrGreater, __FUNC_PREFIX_CORE) // islessgreater +MAKE_CALL_ARG1(IsFinite, __FUNC_PREFIX_CORE) // isfinite +MAKE_CALL_ARG1(IsInf, __FUNC_PREFIX_CORE) // isinf +MAKE_CALL_ARG1(IsNan, __FUNC_PREFIX_CORE) // isnan +MAKE_CALL_ARG1(IsNormal, __FUNC_PREFIX_CORE) // isnormal +MAKE_CALL_ARG2(Ordered, __FUNC_PREFIX_CORE) // isordered +MAKE_CALL_ARG2(Unordered, __FUNC_PREFIX_CORE) // isunordered +MAKE_CALL_ARG1(SignBitSet, __FUNC_PREFIX_CORE) // signbit +MAKE_CALL_ARG1(Any, __FUNC_PREFIX_CORE) // any +MAKE_CALL_ARG1(All, __FUNC_PREFIX_CORE) // all +MAKE_CALL_ARG3(bitselect, __FUNC_PREFIX_OCL) +MAKE_CALL_ARG3(Select, __FUNC_PREFIX_CORE) // select +#ifndef __SYCL_DEVICE_ONLY__ +} // namespace __host_std } // namespace cl +#endif #undef __NOEXC #undef MAKE_CALL_ARG1 #undef MAKE_CALL_ARG2 #undef MAKE_CALL_ARG3 +#undef PPCAT_NX +#undef PPCAT +#undef __FUNC_PREFIX_OCL +#undef __FUNC_PREFIX_CORE diff --git a/sycl/include/CL/sycl/detail/sampler_impl.hpp b/sycl/include/CL/sycl/detail/sampler_impl.hpp index 43dea3451396b..f8f2d86afcf3f 100644 --- a/sycl/include/CL/sycl/detail/sampler_impl.hpp +++ b/sycl/include/CL/sycl/detail/sampler_impl.hpp @@ -24,8 +24,8 @@ namespace detail { class sampler_impl { public: #ifdef __SYCL_DEVICE_ONLY__ - __spirv::OpTypeSampler *m_Sampler; - sampler_impl(__spirv::OpTypeSampler *Sampler) : m_Sampler(Sampler) {} + __ocl_sampler_t m_Sampler; + sampler_impl(__ocl_sampler_t Sampler) : m_Sampler(Sampler) {} #else std::unordered_map m_contextToSampler; diff --git a/sycl/include/CL/sycl/device_event.hpp b/sycl/include/CL/sycl/device_event.hpp index 66217d89005dd..569bcb6ef4db6 100644 --- a/sycl/include/CL/sycl/device_event.hpp +++ b/sycl/include/CL/sycl/device_event.hpp @@ -16,7 +16,7 @@ namespace sycl { class device_event { private: - cl::__spirv::OpTypeEvent *m_Event; + __ocl_event_t *m_Event; public: device_event(const device_event &rhs) = default; @@ -24,11 +24,11 @@ class device_event { device_event &operator=(const device_event &rhs) = default; device_event &operator=(device_event &&rhs) = default; - device_event(cl::__spirv::OpTypeEvent *Event) : m_Event(Event) {} + device_event(__ocl_event_t *Event) : m_Event(Event) {} void wait() { - cl::__spirv::OpGroupWaitEvents(cl::__spirv::Scope::Workgroup, 1, - &m_Event); + __spirv_GroupWaitEvents(Scope::Workgroup, 1, + m_Event); } }; diff --git a/sycl/include/CL/sycl/group.hpp b/sycl/include/CL/sycl/group.hpp index dcd8a69b5ecdd..d5f7c7a683a8e 100644 --- a/sycl/include/CL/sycl/group.hpp +++ b/sycl/include/CL/sycl/group.hpp @@ -81,18 +81,18 @@ template class group { accessMode == access::mode::read_write, access::fence_space>::type accessSpace = access::fence_space::global_and_local) const { - uint32_t flags = ::cl::__spirv::MemorySemantics::SequentiallyConsistent; + uint32_t flags = MemorySemantics::SequentiallyConsistent; switch (accessSpace) { case access::fence_space::global_space: - flags |= cl::__spirv::MemorySemantics::CrossWorkgroupMemory; + flags |= MemorySemantics::CrossWorkgroupMemory; break; case access::fence_space::local_space: - flags |= cl::__spirv::MemorySemantics::WorkgroupMemory; + flags |= MemorySemantics::WorkgroupMemory; break; case access::fence_space::global_and_local: default: - flags |= cl::__spirv::MemorySemantics::CrossWorkgroupMemory | - cl::__spirv::MemorySemantics::WorkgroupMemory; + flags |= MemorySemantics::CrossWorkgroupMemory | + MemorySemantics::WorkgroupMemory; break; } // TODO: currently, there is no good way in SPIRV to set the memory @@ -103,29 +103,29 @@ template class group { // or if we decide that 'accessMode' is the important feature then // we can fix this later, for example, by using OpenCL 1.2 functions // read_mem_fence() and write_mem_fence(). - cl::__spirv::OpMemoryBarrier(cl::__spirv::Scope::Workgroup, flags); + __spirv_MemoryBarrier(Scope::Workgroup, flags); } template device_event async_work_group_copy(local_ptr dest, global_ptr src, size_t numElements) const { - cl::__spirv::OpTypeEvent *e = - cl::__spirv::OpGroupAsyncCopyGlobalToLocal( - cl::__spirv::Scope::Workgroup, + __ocl_event_t e = + OpGroupAsyncCopyGlobalToLocal( + Scope::Workgroup, dest.get(), src.get(), numElements, 1, 0); - return device_event(e); + return device_event(&e); } template device_event async_work_group_copy(global_ptr dest, local_ptr src, size_t numElements) const { - cl::__spirv::OpTypeEvent *e = - cl::__spirv::OpGroupAsyncCopyLocalToGlobal( - cl::__spirv::Scope::Workgroup, + __ocl_event_t e = + OpGroupAsyncCopyLocalToGlobal( + Scope::Workgroup, dest.get(), src.get(), numElements, 1, 0); - return device_event(e); + return device_event(&e); } template @@ -133,11 +133,11 @@ template class group { global_ptr src, size_t numElements, size_t srcStride) const { - cl::__spirv::OpTypeEvent *e = - cl::__spirv::OpGroupAsyncCopyGlobalToLocal( - cl::__spirv::Scope::Workgroup, + __ocl_event_t e = + OpGroupAsyncCopyGlobalToLocal( + Scope::Workgroup, dest.get(), src.get(), numElements, srcStride, 0); - return device_event(e); + return device_event(&e); } template @@ -145,11 +145,11 @@ template class group { local_ptr src, size_t numElements, size_t destStride) const { - cl::__spirv::OpTypeEvent *e = - cl::__spirv::OpGroupAsyncCopyLocalToGlobal( - cl::__spirv::Scope::Workgroup, + __ocl_event_t e = + OpGroupAsyncCopyLocalToGlobal( + Scope::Workgroup, dest.get(), src.get(), numElements, destStride, 0); - return device_event(e); + return device_event(&e); } template diff --git a/sycl/include/CL/sycl/handler2.hpp b/sycl/include/CL/sycl/handler2.hpp index 098c4d9ef2ddb..bb564e91b0f86 100644 --- a/sycl/include/CL/sycl/handler2.hpp +++ b/sycl/include/CL/sycl/handler2.hpp @@ -76,20 +76,20 @@ namespace detail { \ template struct InitSizesST##POSTFIX<1, DstT> { \ static void initSize(DstT &Dst) { \ - Dst[0] = cl::__spirv::get##POSTFIX<0>(); \ + Dst[0] = get##POSTFIX<0>(); \ } \ }; \ \ template struct InitSizesST##POSTFIX<2, DstT> { \ static void initSize(DstT &Dst) { \ - Dst[1] = cl::__spirv::get##POSTFIX<1>(); \ + Dst[1] = get##POSTFIX<1>(); \ InitSizesST##POSTFIX<1, DstT>::initSize(Dst); \ } \ }; \ \ template struct InitSizesST##POSTFIX<3, DstT> { \ static void initSize(DstT &Dst) { \ - Dst[2] = cl::__spirv::get##POSTFIX<2>(); \ + Dst[2] = get##POSTFIX<2>(); \ InitSizesST##POSTFIX<2, DstT>::initSize(Dst); \ } \ }; \ diff --git a/sycl/include/CL/sycl/intel/sub_group.hpp b/sycl/include/CL/sycl/intel/sub_group.hpp index e78e3d0af3bb0..802eeb546d379 100644 --- a/sycl/include/CL/sycl/intel/sub_group.hpp +++ b/sycl/include/CL/sycl/intel/sub_group.hpp @@ -27,90 +27,92 @@ template struct is_vec> : std::true_type {}; struct minimum { - template + template static typename std::enable_if< !std::is_floating_point::value && std::is_signed::value, T>::type calc(T x) { - return cl::__spirv::OpGroupSMin(cl::__spirv::Scope::Subgroup, O, x); + return __spirv_GroupSMin(::Scope::Subgroup, O, x); } - template + template static typename std::enable_if< !std::is_floating_point::value && std::is_unsigned::value, T>::type calc(T x) { - return cl::__spirv::OpGroupUMin(cl::__spirv::Scope::Subgroup, O, x); + return __spirv_GroupUMin(::Scope::Subgroup, O, x); } - template + template static typename std::enable_if::value, T>::type calc(T x) { - return cl::__spirv::OpGroupFMin(cl::__spirv::Scope::Subgroup, O, x); + return __spirv_GroupFMin(::Scope::Subgroup, O, x); } }; struct maximum { - template + template static typename std::enable_if< !std::is_floating_point::value && std::is_signed::value, T>::type calc(T x) { - return cl::__spirv::OpGroupSMax(cl::__spirv::Scope::Subgroup, O, x); + return __spirv_GroupSMax(::Scope::Subgroup, O, x); } - template + template static typename std::enable_if< !std::is_floating_point::value && std::is_unsigned::value, T>::type calc(T x) { - return cl::__spirv::OpGroupUMax(cl::__spirv::Scope::Subgroup, O, x); + return __spirv_GroupUMax(::Scope::Subgroup, O, x); } - template + template static typename std::enable_if::value, T>::type calc(T x) { - return cl::__spirv::OpGroupFMax(cl::__spirv::Scope::Subgroup, O, x); + return __spirv_GroupFMax(::Scope::Subgroup, O, x); } }; struct plus { - template + template static typename std::enable_if< !std::is_floating_point::value && std::is_integral::value, T>::type calc(T x) { - return cl::__spirv::OpGroupIAdd(cl::__spirv::Scope::Subgroup, O, x); + return __spirv_GroupIAdd(::Scope::Subgroup, O, x); } - template + template static typename std::enable_if::value, T>::type calc(T x) { - return cl::__spirv::OpGroupFAdd(cl::__spirv::Scope::Subgroup, O, x); + return __spirv_GroupFAdd(::Scope::Subgroup, O, x); } }; struct sub_group { /* --- common interface members --- */ id<1> get_local_id() const { - return cl::__spirv::VarSubgroupLocalInvocationId; + return __spirv_BuiltInSubgroupLocalInvocationId; } - range<1> get_local_range() const { return cl::__spirv::VarSubgroupSize; } + range<1> get_local_range() const { return __spirv_BuiltInSubgroupSize; } range<1> get_max_local_range() const { - return cl::__spirv::VarSubgroupMaxSize; + return __spirv_BuiltInSubgroupMaxSize; } - id<1> get_group_id() const { return cl::__spirv::VarSubgroupId; } + id<1> get_group_id() const { return __spirv_BuiltInSubgroupId; } - unsigned int get_group_range() const { return cl::__spirv::VarNumSubgroups; } + unsigned int get_group_range() const { + return __spirv_BuiltInNumSubgroups; + } unsigned int get_uniform_group_range() const { - return cl::__spirv::VarNumEnqueuedSubgroups; + return __spirv_BuiltInNumEnqueuedSubgroups; } /* --- vote / ballot functions --- */ bool any(bool predicate) { - return cl::__spirv::OpGroupAny(cl::__spirv::Scope::Subgroup, predicate); + return __spirv_GroupAny(::Scope::Subgroup, predicate); } bool all(bool predicate) { - return cl::__spirv::OpGroupAll(cl::__spirv::Scope::Subgroup, predicate); + return __spirv_GroupAll(::Scope::Subgroup, predicate); } /* --- collectives --- */ @@ -118,25 +120,25 @@ struct sub_group { template T broadcast(typename std::enable_if::value, T>::type x, id<1> local_id) { - return cl::__spirv::OpGroupBroadcast(cl::__spirv::Scope::Subgroup, x, + return __spirv_GroupBroadcast(::Scope::Subgroup, x, local_id.get(0)); } template T reduce(typename std::enable_if::value, T>::type x) { - return BinaryOperation::template calc(x); + return BinaryOperation::template calc(x); } template T exclusive_scan( typename std::enable_if::value, T>::type x) { - return BinaryOperation::template calc(x); + return BinaryOperation::template calc(x); } template T inclusive_scan( typename std::enable_if::value, T>::type x) { - return BinaryOperation::template calc(x); + return BinaryOperation::template calc(x); } template @@ -152,13 +154,13 @@ struct sub_group { template EnableIfIsArithmeticOrHalf shuffle(T x, id<1> local_id) { - return cl::__spirv::OpSubgroupShuffleINTEL(x, local_id.get(0)); + return __spirv_SubgroupShuffleINTEL(x, local_id.get(0)); } template typename std::enable_if::value, T>::type shuffle(T x, id<1> local_id) { - return cl::__spirv::OpSubgroupShuffleINTEL((typename T::vector_t)x, + return __spirv_SubgroupShuffleINTEL((typename T::vector_t)x, local_id.get(0)); } @@ -189,13 +191,13 @@ struct sub_group { template EnableIfIsArithmeticOrHalf shuffle_xor(T x, id<1> value) { - return cl::__spirv::OpSubgroupShuffleXorINTEL(x, (uint32_t)value.get(0)); + return __spirv_SubgroupShuffleXorINTEL(x, (uint32_t)value.get(0)); } template typename std::enable_if::value, T>::type shuffle_xor(T x, id<1> value) { - return cl::__spirv::OpSubgroupShuffleXorINTEL((typename T::vector_t)x, + return __spirv_SubgroupShuffleXorINTEL((typename T::vector_t)x, (uint32_t)value.get(0)); } @@ -204,14 +206,14 @@ struct sub_group { template EnableIfIsArithmeticOrHalf shuffle(T x, T y, id<1> local_id) { - return cl::__spirv::OpSubgroupShuffleDownINTEL( + return __spirv_SubgroupShuffleDownINTEL( x, y, local_id.get(0) - get_local_id().get(0)); } template typename std::enable_if::value, T>::type shuffle(T x, T y, id<1> local_id) { - return cl::__spirv::OpSubgroupShuffleDownINTEL( + return __spirv_SubgroupShuffleDownINTEL( (typename T::vector_t)x, (typename T::vector_t)y, local_id.get(0) - get_local_id().get(0)); } @@ -219,26 +221,26 @@ struct sub_group { template EnableIfIsArithmeticOrHalf shuffle_down(T current, T next, uint32_t delta) { - return cl::__spirv::OpSubgroupShuffleDownINTEL(current, next, delta); + return __spirv_SubgroupShuffleDownINTEL(current, next, delta); } template typename std::enable_if::value, T>::type shuffle_down(T current, T next, uint32_t delta) { - return cl::__spirv::OpSubgroupShuffleDownINTEL( + return __spirv_SubgroupShuffleDownINTEL( (typename T::vector_t)current, (typename T::vector_t)next, delta); } template EnableIfIsArithmeticOrHalf shuffle_up(T previous, T current, uint32_t delta) { - return cl::__spirv::OpSubgroupShuffleUpINTEL(previous, current, delta); + return __spirv_SubgroupShuffleUpINTEL(previous, current, delta); } template typename std::enable_if::value, T>::type shuffle_up(T previous, T current, uint32_t delta) { - return cl::__spirv::OpSubgroupShuffleUpINTEL( + return __spirv_SubgroupShuffleUpINTEL( (typename T::vector_t)previous, (typename T::vector_t)current, delta); } @@ -252,11 +254,11 @@ struct sub_group { T>::type load(const multi_ptr src) { if (sizeof(T) == sizeof(uint32_t)) { - uint32_t t = cl::__spirv::OpSubgroupBlockReadINTEL( + uint32_t t = __spirv_SubgroupBlockReadINTEL( (const __global uint32_t *)src.get()); return *((T *)(&t)); } - uint16_t t = cl::__spirv::OpSubgroupBlockReadINTEL( + uint16_t t = __spirv_SubgroupBlockReadINTEL( (const __global uint16_t *)src.get()); return *((T *)(&t)); } @@ -274,13 +276,13 @@ struct sub_group { if (sizeof(T) == sizeof(uint32_t)) { typedef uint32_t ocl_t __attribute__((ext_vector_type(N))); - ocl_t t = cl::__spirv::OpSubgroupBlockReadINTEL( + ocl_t t = __spirv_SubgroupBlockReadINTEL( (const __global uint32_t *)src.get()); return *((typename vec::vector_t *)(&t)); } typedef uint16_t ocl_t __attribute__((ext_vector_type(N))); - ocl_t t = cl::__spirv::OpSubgroupBlockReadINTEL( + ocl_t t = __spirv_SubgroupBlockReadINTEL( (const __global uint16_t *)src.get()); return *((typename vec::vector_t *)(&t)); } @@ -293,10 +295,10 @@ struct sub_group { Space == access::address_space::global_space, T>::type &x) { if (sizeof(T) == sizeof(uint32_t)) { - cl::__spirv::OpSubgroupBlockWriteINTEL( + __spirv_SubgroupBlockWriteINTEL( (__global uint32_t *)dst.get(), *((uint32_t *)&x)); } else { - cl::__spirv::OpSubgroupBlockWriteINTEL( + __spirv_SubgroupBlockWriteINTEL( (__global uint16_t *)dst.get(), *((uint16_t *)&x)); } } @@ -318,11 +320,11 @@ struct sub_group { N> &x) { if (sizeof(T) == sizeof(uint32_t)) { typedef uint32_t ocl_t __attribute__((ext_vector_type(N))); - cl::__spirv::OpSubgroupBlockWriteINTEL((__global uint32_t *)dst.get(), + __spirv_SubgroupBlockWriteINTEL((__global uint32_t *)dst.get(), *((ocl_t *)&x)); } else { typedef uint16_t ocl_t __attribute__((ext_vector_type(N))); - cl::__spirv::OpSubgroupBlockWriteINTEL((__global uint16_t *)dst.get(), + __spirv_SubgroupBlockWriteINTEL((__global uint16_t *)dst.get(), *((ocl_t *)&x)); } } @@ -330,22 +332,22 @@ struct sub_group { /* --- synchronization functions --- */ void barrier(access::fence_space accessSpace = access::fence_space::global_and_local) const { - uint32_t flags = cl::__spirv::MemorySemantics::SequentiallyConsistent; + uint32_t flags = ::MemorySemantics::SequentiallyConsistent; switch (accessSpace) { case access::fence_space::global_space: - flags |= cl::__spirv::MemorySemantics::CrossWorkgroupMemory; + flags |= ::MemorySemantics::CrossWorkgroupMemory; break; case access::fence_space::local_space: - flags |= cl::__spirv::MemorySemantics::SubgroupMemory; + flags |= ::MemorySemantics::SubgroupMemory; break; case access::fence_space::global_and_local: default: - flags |= cl::__spirv::MemorySemantics::CrossWorkgroupMemory | - cl::__spirv::MemorySemantics::SubgroupMemory; + flags |= ::MemorySemantics::CrossWorkgroupMemory | + ::MemorySemantics::SubgroupMemory; break; } - cl::__spirv::OpControlBarrier(cl::__spirv::Scope::Subgroup, - cl::__spirv::Scope::Workgroup, flags); + __spirv_ControlBarrier(::Scope::Subgroup, + ::Scope::Workgroup, flags); } protected: diff --git a/sycl/include/CL/sycl/multi_ptr.hpp b/sycl/include/CL/sycl/multi_ptr.hpp index 097b6bbb2aca3..1e68eb0f65936 100644 --- a/sycl/include/CL/sycl/multi_ptr.hpp +++ b/sycl/include/CL/sycl/multi_ptr.hpp @@ -259,7 +259,7 @@ template class multi_ptr { #else auto PrefetchPtr = reinterpret_cast(m_Pointer); #endif - cl::__spirv::prefetch(PrefetchPtr, NumBytes); + __spirv_ocl_prefetch(PrefetchPtr, NumBytes); } private: diff --git a/sycl/include/CL/sycl/nd_item.hpp b/sycl/include/CL/sycl/nd_item.hpp index 9e9fa9d1011cc..27cdf4d2bb92f 100644 --- a/sycl/include/CL/sycl/nd_item.hpp +++ b/sycl/include/CL/sycl/nd_item.hpp @@ -81,22 +81,22 @@ template struct nd_item { void barrier(access::fence_space accessSpace = access::fence_space::global_and_local) const { - uint32_t flags = ::cl::__spirv::MemorySemantics::SequentiallyConsistent; + uint32_t flags = MemorySemantics::SequentiallyConsistent; switch (accessSpace) { case access::fence_space::global_space: - flags |= cl::__spirv::MemorySemantics::CrossWorkgroupMemory; + flags |= MemorySemantics::CrossWorkgroupMemory; break; case access::fence_space::local_space: - flags |= cl::__spirv::MemorySemantics::WorkgroupMemory; + flags |= MemorySemantics::WorkgroupMemory; break; case access::fence_space::global_and_local: default: - flags |= cl::__spirv::MemorySemantics::CrossWorkgroupMemory | - cl::__spirv::MemorySemantics::WorkgroupMemory; + flags |= MemorySemantics::CrossWorkgroupMemory | + MemorySemantics::WorkgroupMemory; break; } - cl::__spirv::OpControlBarrier(::cl::__spirv::Scope::Workgroup, - ::cl::__spirv::Scope::Workgroup, flags); + __spirv_ControlBarrier(Scope::Workgroup, + Scope::Workgroup, flags); } /// Executes a work-group mem-fence with memory ordering on the local address diff --git a/sycl/include/CL/sycl/sampler.hpp b/sycl/include/CL/sycl/sampler.hpp index 05507f8087443..fd03dec7383a9 100644 --- a/sycl/include/CL/sycl/sampler.hpp +++ b/sycl/include/CL/sycl/sampler.hpp @@ -60,7 +60,7 @@ class sampler { private: #ifdef __SYCL_DEVICE_ONLY__ detail::sampler_impl impl; - void __init(__spirv::OpTypeSampler *Sampler) { impl.m_Sampler = Sampler; } + void __init(__ocl_sampler_t Sampler) { impl.m_Sampler = Sampler; } char padding[sizeof(std::shared_ptr) - sizeof(impl)]; #else std::shared_ptr impl; diff --git a/sycl/source/detail/builtins.cpp b/sycl/source/detail/builtins.cpp index 90903e5da402e..cedda21b5b045 100644 --- a/sycl/source/detail/builtins.cpp +++ b/sycl/source/detail/builtins.cpp @@ -376,19 +376,19 @@ template <> struct helper<0> { }; } // namespace detail -s::cl_float OpDot(s::cl_float2, s::cl_float2); -s::cl_float OpDot(s::cl_float3, s::cl_float3); -s::cl_float OpDot(s::cl_float4, s::cl_float4); -s::cl_double OpDot(s::cl_double2, s::cl_double2); -s::cl_double OpDot(s::cl_double3, s::cl_double3); -s::cl_double OpDot(s::cl_double4, s::cl_double4); -s::cl_half OpDot(s::cl_half2, s::cl_half2); -s::cl_half OpDot(s::cl_half3, s::cl_half3); -s::cl_half OpDot(s::cl_half4, s::cl_half4); - -s::cl_int OpAll(s::cl_int2); -s::cl_int OpAll(s::cl_int3); -s::cl_int OpAll(s::cl_int4); +s::cl_float Dot(s::cl_float2, s::cl_float2); +s::cl_float Dot(s::cl_float3, s::cl_float3); +s::cl_float Dot(s::cl_float4, s::cl_float4); +s::cl_double Dot(s::cl_double2, s::cl_double2); +s::cl_double Dot(s::cl_double3, s::cl_double3); +s::cl_double Dot(s::cl_double4, s::cl_double4); +s::cl_half Dot(s::cl_half2, s::cl_half2); +s::cl_half Dot(s::cl_half3, s::cl_half3); +s::cl_half Dot(s::cl_half4, s::cl_half4); + +s::cl_int All(s::cl_int2); +s::cl_int All(s::cl_int3); +s::cl_int All(s::cl_int4); namespace { template inline T __acospi(T x) { return std::acos(x) / M_PI; } @@ -650,26 +650,26 @@ template inline T __cross(T p0, T p1) { return result; } -template inline void __OpFMul_impl(T &r, T p0, T p1) { +template inline void __FMul_impl(T &r, T p0, T p1) { r += p0 * p1; } -template inline T __OpFMul(T p0, T p1) { +template inline T __FMul(T p0, T p1) { T result = 0; - __OpFMul_impl(result, p0, p1); + __FMul_impl(result, p0, p1); return result; } template inline typename std::enable_if::value, T>::type __length(T t) { - return std::sqrt(__OpFMul(t, t)); + return std::sqrt(__FMul(t, t)); } template inline typename std::enable_if::value, typename T::element_type>::type __length(T t) { - return std::sqrt(OpDot(t, t)); + return std::sqrt(Dot(t, t)); } template @@ -689,81 +689,81 @@ __normalize(T t) { template inline typename std::enable_if::value, T>::type __fast_length(T t) { - return std::sqrt(__OpFMul(t, t)); + return std::sqrt(__FMul(t, t)); } template inline typename std::enable_if::value, typename T::element_type>::type __fast_length(T t) { - return std::sqrt(OpDot(t, t)); + return std::sqrt(Dot(t, t)); } template inline typename std::enable_if::value, T>::type __fast_normalize(T t) { - if (OpAll(t == T(0.0f))) + if (All(t == T(0.0f))) return t; - typename T::element_type r = std::sqrt(OpDot(t, t)); + typename T::element_type r = std::sqrt(Dot(t, t)); return t / T(r); } -template inline T __vOpFOrdEqual(T x, T y) { return -(x == y); } +template inline T __vFOrdEqual(T x, T y) { return -(x == y); } -template inline T __sOpFOrdEqual(T x, T y) { return x == y; } +template inline T __sFOrdEqual(T x, T y) { return x == y; } -template inline T __vOpFUnordNotEqual(T x, T y) { +template inline T __vFUnordNotEqual(T x, T y) { return -(x != y); } -template inline T __sOpFUnordNotEqual(T x, T y) { return x != y; } +template inline T __sFUnordNotEqual(T x, T y) { return x != y; } -template inline T __vOpFOrdGreaterThan(T x, T y) { +template inline T __vFOrdGreaterThan(T x, T y) { return -(x > y); } -template inline T __sOpFOrdGreaterThan(T x, T y) { return x > y; } +template inline T __sFOrdGreaterThan(T x, T y) { return x > y; } -template inline T __vOpFOrdGreaterThanEqual(T x, T y) { +template inline T __vFOrdGreaterThanEqual(T x, T y) { return -(x >= y); } -template inline T __sOpFOrdGreaterThanEqual(T x, T y) { +template inline T __sFOrdGreaterThanEqual(T x, T y) { return x >= y; } -template inline T __vOpFOrdLessThanEqual(T x, T y) { +template inline T __vFOrdLessThanEqual(T x, T y) { return -(x <= y); } -template inline T __sOpFOrdLessThanEqual(T x, T y) { +template inline T __sFOrdLessThanEqual(T x, T y) { return x <= y; } -template inline T __vOpLessOrGreater(T x, T y) { +template inline T __vLessOrGreater(T x, T y) { return -((x < y) || (x > y)); } -template inline T __sOpLessOrGreater(T x, T y) { +template inline T __sLessOrGreater(T x, T y) { return ((x < y) || (x > y)); } -template cl_int inline __OpAny(T x) { return d::msbIsSet(x); } -template cl_int inline __OpAll(T x) { return d::msbIsSet(x); } +template cl_int inline __Any(T x) { return d::msbIsSet(x); } +template cl_int inline __All(T x) { return d::msbIsSet(x); } -template inline T __vOpOrdered(T x, T y) { +template inline T __vOrdered(T x, T y) { return -(!(std::isunordered(x, y))); } -template inline T __sOpOrdered(T x, T y) { +template inline T __sOrdered(T x, T y) { return !(std::isunordered(x, y)); } -template inline T __vOpUnordered(T x, T y) { +template inline T __vUnordered(T x, T y) { return -(std::isunordered(x, y)); } -template inline T __sOpUnordered(T x, T y) { +template inline T __sUnordered(T x, T y) { return std::isunordered(x, y); } @@ -813,11 +813,11 @@ typename std::enable_if::value, T>::type inline __bitselect( return br.f; } -template inline T2 __OpSelect(T c, T2 b, T2 a) { +template inline T2 __Select(T c, T2 b, T2 a) { return (c ? b : a); } -template inline T2 __vOpSelect(T c, T2 b, T2 a) { +template inline T2 __vSelect(T c, T2 b, T2 a) { return d::msbIsSet(c) ? b : a; } } // namespace @@ -1991,7 +1991,7 @@ MAKE_1V_2V(u_upsample, s::cl_uint, s::cl_ushort, s::cl_ushort) MAKE_1V_2V(u_upsample, s::cl_ulong, s::cl_uint, s::cl_uint) // TODO delete when Intel CPU OpenCL runtime will be fixed -// OpExtInst ... s_upsample -> _Z8upsampleij (now _Z8upsampleii) +// ExtInst ... s_upsample -> _Z8upsampleij (now _Z8upsampleii) #define s_upsample u_upsample cl_short s_upsample(s::cl_char x, s::cl_uchar y) __NOEXC { @@ -2184,16 +2184,16 @@ s::cl_half4 cross(s::cl_half4 p0, s::cl_half4 p1) __NOEXC { return __cross(p0, p1); } -// OpFMul -cl_float OpFMul(s::cl_float p0, s::cl_float p1) { return __OpFMul(p0, p1); } -cl_double OpFMul(s::cl_double p0, s::cl_double p1) { return __OpFMul(p0, p1); } -cl_float OpFMul(s::cl_half p0, s::cl_half p1) { return __OpFMul(p0, p1); } +// FMul +cl_float FMul(s::cl_float p0, s::cl_float p1) { return __FMul(p0, p1); } +cl_double FMul(s::cl_double p0, s::cl_double p1) { return __FMul(p0, p1); } +cl_float FMul(s::cl_half p0, s::cl_half p1) { return __FMul(p0, p1); } -// OpDot -MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_float, s::cl_float, s::cl_float) -MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_double, s::cl_double, +// Dot +MAKE_GEO_1V_2V_RS(Dot, __FMul_impl, s::cl_float, s::cl_float, s::cl_float) +MAKE_GEO_1V_2V_RS(Dot, __FMul_impl, s::cl_double, s::cl_double, s::cl_double) -MAKE_GEO_1V_2V_RS(OpDot, __OpFMul_impl, s::cl_half, s::cl_half, s::cl_half) +MAKE_GEO_1V_2V_RS(Dot, __FMul_impl, s::cl_half, s::cl_half, s::cl_half) // length cl_float length(s::cl_float p) { return __length(p); } @@ -2253,7 +2253,7 @@ cl_float fast_length(s::cl_float4 p) { return __fast_length(p); } s::cl_float fast_normalize(s::cl_float p) { if (p == 0.0f) return p; - s::cl_float r = std::sqrt(OpFMul(p, p)); + s::cl_float r = std::sqrt(FMul(p, p)); return p / r; } s::cl_float2 fast_normalize(s::cl_float2 p) { return __fast_normalize(p); } @@ -2275,230 +2275,230 @@ cl_float fast_distance(s::cl_float4 p0, s::cl_float4 p1) { } /* --------------- 4.13.7 Relational functions. Host version --------------*/ -// OpFOrdEqual-isequal -cl_int OpFOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { - return __sOpFOrdEqual(x, y); +// FOrdEqual-isequal +cl_int FOrdEqual(s::cl_float x, s::cl_float y) __NOEXC { + return __sFOrdEqual(x, y); } -cl_int OpFOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { - return __sOpFOrdEqual(x, y); +cl_int FOrdEqual(s::cl_double x, s::cl_double y) __NOEXC { + return __sFOrdEqual(x, y); } -cl_int OpFOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { - return __sOpFOrdEqual(x, y); +cl_int FOrdEqual(s::cl_half x, s::cl_half y) __NOEXC { + return __sFOrdEqual(x, y); } -MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_int, s::cl_float, +MAKE_1V_2V_FUNC(FOrdEqual, __vFOrdEqual, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_long, s::cl_double, +MAKE_1V_2V_FUNC(FOrdEqual, __vFOrdEqual, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpFOrdEqual, __vOpFOrdEqual, s::cl_short, s::cl_half, +MAKE_1V_2V_FUNC(FOrdEqual, __vFOrdEqual, s::cl_short, s::cl_half, s::cl_half) -// OpFUnordNotEqual-isnotequal -cl_int OpFUnordNotEqual(s::cl_float x, s::cl_float y) __NOEXC { - return __sOpFUnordNotEqual(x, y); +// FUnordNotEqual-isnotequal +cl_int FUnordNotEqual(s::cl_float x, s::cl_float y) __NOEXC { + return __sFUnordNotEqual(x, y); } -cl_int OpFUnordNotEqual(s::cl_double x, s::cl_double y) __NOEXC { - return __sOpFUnordNotEqual(x, y); +cl_int FUnordNotEqual(s::cl_double x, s::cl_double y) __NOEXC { + return __sFUnordNotEqual(x, y); } -cl_int OpFUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { - return __sOpFUnordNotEqual(x, y); +cl_int FUnordNotEqual(s::cl_half x, s::cl_half y) __NOEXC { + return __sFUnordNotEqual(x, y); } -MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_int, s::cl_float, +MAKE_1V_2V_FUNC(FUnordNotEqual, __vFUnordNotEqual, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_long, s::cl_double, +MAKE_1V_2V_FUNC(FUnordNotEqual, __vFUnordNotEqual, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpFUnordNotEqual, __vOpFUnordNotEqual, s::cl_short, s::cl_half, +MAKE_1V_2V_FUNC(FUnordNotEqual, __vFUnordNotEqual, s::cl_short, s::cl_half, s::cl_half) -// (OpFOrdGreaterThan) // isgreater -cl_int OpFOrdGreaterThan(s::cl_float x, s::cl_float y) __NOEXC { - return __sOpFOrdGreaterThan(x, y); +// (FOrdGreaterThan) // isgreater +cl_int FOrdGreaterThan(s::cl_float x, s::cl_float y) __NOEXC { + return __sFOrdGreaterThan(x, y); } -cl_int OpFOrdGreaterThan(s::cl_double x, s::cl_double y) __NOEXC { - return __sOpFOrdGreaterThan(x, y); +cl_int FOrdGreaterThan(s::cl_double x, s::cl_double y) __NOEXC { + return __sFOrdGreaterThan(x, y); } -cl_int OpFOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { - return __sOpFOrdGreaterThan(x, y); +cl_int FOrdGreaterThan(s::cl_half x, s::cl_half y) __NOEXC { + return __sFOrdGreaterThan(x, y); } -MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_int, s::cl_float, +MAKE_1V_2V_FUNC(FOrdGreaterThan, __vFOrdGreaterThan, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_long, +MAKE_1V_2V_FUNC(FOrdGreaterThan, __vFOrdGreaterThan, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpFOrdGreaterThan, __vOpFOrdGreaterThan, s::cl_short, +MAKE_1V_2V_FUNC(FOrdGreaterThan, __vFOrdGreaterThan, s::cl_short, s::cl_half, s::cl_half) -// (OpFOrdGreaterThanEqual) // isgreaterequal -cl_int OpFOrdGreaterThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return __sOpFOrdGreaterThanEqual(x, y); +// (FOrdGreaterThanEqual) // isgreaterequal +cl_int FOrdGreaterThanEqual(s::cl_float x, s::cl_float y) __NOEXC { + return __sFOrdGreaterThanEqual(x, y); } -cl_int OpFOrdGreaterThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return __sOpFOrdGreaterThanEqual(x, y); +cl_int FOrdGreaterThanEqual(s::cl_double x, s::cl_double y) __NOEXC { + return __sFOrdGreaterThanEqual(x, y); } -cl_int OpFOrdGreaterThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return __sOpFOrdGreaterThanEqual(x, y); +cl_int FOrdGreaterThanEqual(s::cl_half x, s::cl_half y) __NOEXC { + return __sFOrdGreaterThanEqual(x, y); } -MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_int, +MAKE_1V_2V_FUNC(FOrdGreaterThanEqual, __vFOrdGreaterThanEqual, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_long, +MAKE_1V_2V_FUNC(FOrdGreaterThanEqual, __vFOrdGreaterThanEqual, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpFOrdGreaterThanEqual, __vOpFOrdGreaterThanEqual, s::cl_short, +MAKE_1V_2V_FUNC(FOrdGreaterThanEqual, __vFOrdGreaterThanEqual, s::cl_short, s::cl_half, s::cl_half) -// (OpFOrdLessThan) // isless -cl_int OpFOrdLessThan(s::cl_float x, s::cl_float y) __NOEXC { return (x < y); } -cl_int OpFOrdLessThan(s::cl_double x, s::cl_double y) __NOEXC { +// (FOrdLessThan) // isless +cl_int FOrdLessThan(s::cl_float x, s::cl_float y) __NOEXC { return (x < y); } +cl_int FOrdLessThan(s::cl_double x, s::cl_double y) __NOEXC { return (x < y); } -cl_int __vOpFOrdLessThan(s::cl_float x, s::cl_float y) __NOEXC { +cl_int __vFOrdLessThan(s::cl_float x, s::cl_float y) __NOEXC { return -(x < y); } -cl_long __vOpFOrdLessThan(s::cl_double x, s::cl_double y) __NOEXC { +cl_long __vFOrdLessThan(s::cl_double x, s::cl_double y) __NOEXC { return -(x < y); } -cl_int OpFOrdLessThan(s::cl_half x, s::cl_half y) __NOEXC { return (x < y); } -cl_short __vOpFOrdLessThan(s::cl_half x, s::cl_half y) __NOEXC { +cl_int FOrdLessThan(s::cl_half x, s::cl_half y) __NOEXC { return (x < y); } +cl_short __vFOrdLessThan(s::cl_half x, s::cl_half y) __NOEXC { return -(x < y); } -MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_int, s::cl_float, +MAKE_1V_2V_FUNC(FOrdLessThan, __vFOrdLessThan, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_long, s::cl_double, +MAKE_1V_2V_FUNC(FOrdLessThan, __vFOrdLessThan, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpFOrdLessThan, __vOpFOrdLessThan, s::cl_short, s::cl_half, +MAKE_1V_2V_FUNC(FOrdLessThan, __vFOrdLessThan, s::cl_short, s::cl_half, s::cl_half) -// (OpFOrdLessThanEqual) // islessequal -cl_int OpFOrdLessThanEqual(s::cl_float x, s::cl_float y) __NOEXC { - return __sOpFOrdLessThanEqual(x, y); +// (FOrdLessThanEqual) // islessequal +cl_int FOrdLessThanEqual(s::cl_float x, s::cl_float y) __NOEXC { + return __sFOrdLessThanEqual(x, y); } -cl_int OpFOrdLessThanEqual(s::cl_double x, s::cl_double y) __NOEXC { - return __sOpFOrdLessThanEqual(x, y); +cl_int FOrdLessThanEqual(s::cl_double x, s::cl_double y) __NOEXC { + return __sFOrdLessThanEqual(x, y); } -cl_int OpFOrdLessThanEqual(s::cl_half x, s::cl_half y) __NOEXC { - return __sOpFOrdLessThanEqual(x, y); +cl_int FOrdLessThanEqual(s::cl_half x, s::cl_half y) __NOEXC { + return __sFOrdLessThanEqual(x, y); } -MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_int, +MAKE_1V_2V_FUNC(FOrdLessThanEqual, __vFOrdLessThanEqual, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_long, +MAKE_1V_2V_FUNC(FOrdLessThanEqual, __vFOrdLessThanEqual, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpFOrdLessThanEqual, __vOpFOrdLessThanEqual, s::cl_short, +MAKE_1V_2V_FUNC(FOrdLessThanEqual, __vFOrdLessThanEqual, s::cl_short, s::cl_half, s::cl_half) -// (OpLessOrGreater) // islessgreater -cl_int OpLessOrGreater(s::cl_float x, s::cl_float y) __NOEXC { - return __sOpLessOrGreater(x, y); +// (LessOrGreater) // islessgreater +cl_int LessOrGreater(s::cl_float x, s::cl_float y) __NOEXC { + return __sLessOrGreater(x, y); } -cl_int OpLessOrGreater(s::cl_double x, s::cl_double y) __NOEXC { - return __sOpLessOrGreater(x, y); +cl_int LessOrGreater(s::cl_double x, s::cl_double y) __NOEXC { + return __sLessOrGreater(x, y); } -cl_int OpLessOrGreater(s::cl_half x, s::cl_half y) __NOEXC { - return __sOpLessOrGreater(x, y); +cl_int LessOrGreater(s::cl_half x, s::cl_half y) __NOEXC { + return __sLessOrGreater(x, y); } -MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_int, s::cl_float, +MAKE_1V_2V_FUNC(LessOrGreater, __vLessOrGreater, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_long, s::cl_double, +MAKE_1V_2V_FUNC(LessOrGreater, __vLessOrGreater, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpLessOrGreater, __vOpLessOrGreater, s::cl_short, s::cl_half, +MAKE_1V_2V_FUNC(LessOrGreater, __vLessOrGreater, s::cl_short, s::cl_half, s::cl_half) -// (OpIsFinite) // isfinite -cl_int OpIsFinite(s::cl_float x) __NOEXC { return std::isfinite(x); } -cl_int OpIsFinite(s::cl_double x) __NOEXC { return std::isfinite(x); } -cl_int __vOpIsFinite(s::cl_float x) __NOEXC { return -(std::isfinite(x)); } -cl_long __vOpIsFinite(s::cl_double x) __NOEXC { return -(std::isfinite(x)); } -cl_int OpIsFinite(s::cl_half x) __NOEXC { return std::isfinite(x); } -cl_short __vOpIsFinite(s::cl_half x) __NOEXC { return -(std::isfinite(x)); } -MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_int, s::cl_float) -MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_long, s::cl_double) -MAKE_1V_FUNC(OpIsFinite, __vOpIsFinite, s::cl_short, s::cl_half) - -// (OpIsInf) // isinf -cl_int OpIsInf(s::cl_float x) __NOEXC { return std::isinf(x); } -cl_int OpIsInf(s::cl_double x) __NOEXC { return std::isinf(x); } -cl_int __vOpIsInf(s::cl_float x) __NOEXC { return -(std::isinf(x)); } -cl_long __vOpIsInf(s::cl_double x) __NOEXC { return -(std::isinf(x)); } -cl_int OpIsInf(s::cl_half x) __NOEXC { return std::isinf(x); } -cl_short __vOpIsInf(s::cl_half x) __NOEXC { return -(std::isinf(x)); } -MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_int, s::cl_float) -MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_long, s::cl_double) -MAKE_1V_FUNC(OpIsInf, __vOpIsInf, s::cl_short, s::cl_half) - -// (OpIsNan) // isnan -cl_int OpIsNan(s::cl_float x) __NOEXC { return std::isnan(x); } -cl_int OpIsNan(s::cl_double x) __NOEXC { return std::isnan(x); } -cl_int __vOpIsNan(s::cl_float x) __NOEXC { return -(std::isnan(x)); } -cl_long __vOpIsNan(s::cl_double x) __NOEXC { return -(std::isnan(x)); } - -cl_int OpIsNan(s::cl_half x) __NOEXC { return std::isnan(x); } -cl_short __vOpIsNan(s::cl_half x) __NOEXC { return -(std::isnan(x)); } -MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_int, s::cl_float) -MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_long, s::cl_double) -MAKE_1V_FUNC(OpIsNan, __vOpIsNan, s::cl_short, s::cl_half) - -// (OpIsNormal) // isnormal -cl_int OpIsNormal(s::cl_float x) __NOEXC { return std::isnormal(x); } -cl_int OpIsNormal(s::cl_double x) __NOEXC { return std::isnormal(x); } -cl_int __vOpIsNormal(s::cl_float x) __NOEXC { return -(std::isnormal(x)); } -cl_long __vOpIsNormal(s::cl_double x) __NOEXC { return -(std::isnormal(x)); } -cl_int OpIsNormal(s::cl_half x) __NOEXC { return std::isnormal(x); } -cl_short __vOpIsNormal(s::cl_half x) __NOEXC { return -(std::isnormal(x)); } -MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_int, s::cl_float) -MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_long, s::cl_double) -MAKE_1V_FUNC(OpIsNormal, __vOpIsNormal, s::cl_short, s::cl_half) - -// (OpOrdered) // isordered -cl_int OpOrdered(s::cl_float x, s::cl_float y) __NOEXC { - return __vOpOrdered(x, y); -} -cl_int OpOrdered(s::cl_double x, s::cl_double y) __NOEXC { - return __vOpOrdered(x, y); -} -cl_int OpOrdered(s::cl_half x, s::cl_half y) __NOEXC { - return __vOpOrdered(x, y); -} -MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpOrdered, __vOpOrdered, s::cl_short, s::cl_half, s::cl_half) - -// (OpUnordered) // isunordered -cl_int OpUnordered(s::cl_float x, s::cl_float y) __NOEXC { - return __sOpUnordered(x, y); -} -cl_int OpUnordered(s::cl_double x, s::cl_double y) __NOEXC { - return __sOpUnordered(x, y); -} -cl_int OpUnordered(s::cl_half x, s::cl_half y) __NOEXC { - return __sOpUnordered(x, y); -} -MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_int, s::cl_float, +// (IsFinite) // isfinite +cl_int IsFinite(s::cl_float x) __NOEXC { return std::isfinite(x); } +cl_int IsFinite(s::cl_double x) __NOEXC { return std::isfinite(x); } +cl_int __vIsFinite(s::cl_float x) __NOEXC { return -(std::isfinite(x)); } +cl_long __vIsFinite(s::cl_double x) __NOEXC { return -(std::isfinite(x)); } +cl_int IsFinite(s::cl_half x) __NOEXC { return std::isfinite(x); } +cl_short __vIsFinite(s::cl_half x) __NOEXC { return -(std::isfinite(x)); } +MAKE_1V_FUNC(IsFinite, __vIsFinite, s::cl_int, s::cl_float) +MAKE_1V_FUNC(IsFinite, __vIsFinite, s::cl_long, s::cl_double) +MAKE_1V_FUNC(IsFinite, __vIsFinite, s::cl_short, s::cl_half) + +// (IsInf) // isinf +cl_int IsInf(s::cl_float x) __NOEXC { return std::isinf(x); } +cl_int IsInf(s::cl_double x) __NOEXC { return std::isinf(x); } +cl_int __vIsInf(s::cl_float x) __NOEXC { return -(std::isinf(x)); } +cl_long __vIsInf(s::cl_double x) __NOEXC { return -(std::isinf(x)); } +cl_int IsInf(s::cl_half x) __NOEXC { return std::isinf(x); } +cl_short __vIsInf(s::cl_half x) __NOEXC { return -(std::isinf(x)); } +MAKE_1V_FUNC(IsInf, __vIsInf, s::cl_int, s::cl_float) +MAKE_1V_FUNC(IsInf, __vIsInf, s::cl_long, s::cl_double) +MAKE_1V_FUNC(IsInf, __vIsInf, s::cl_short, s::cl_half) + +// (IsNan) // isnan +cl_int IsNan(s::cl_float x) __NOEXC { return std::isnan(x); } +cl_int IsNan(s::cl_double x) __NOEXC { return std::isnan(x); } +cl_int __vIsNan(s::cl_float x) __NOEXC { return -(std::isnan(x)); } +cl_long __vIsNan(s::cl_double x) __NOEXC { return -(std::isnan(x)); } + +cl_int IsNan(s::cl_half x) __NOEXC { return std::isnan(x); } +cl_short __vIsNan(s::cl_half x) __NOEXC { return -(std::isnan(x)); } +MAKE_1V_FUNC(IsNan, __vIsNan, s::cl_int, s::cl_float) +MAKE_1V_FUNC(IsNan, __vIsNan, s::cl_long, s::cl_double) +MAKE_1V_FUNC(IsNan, __vIsNan, s::cl_short, s::cl_half) + +// (IsNormal) // isnormal +cl_int IsNormal(s::cl_float x) __NOEXC { return std::isnormal(x); } +cl_int IsNormal(s::cl_double x) __NOEXC { return std::isnormal(x); } +cl_int __vIsNormal(s::cl_float x) __NOEXC { return -(std::isnormal(x)); } +cl_long __vIsNormal(s::cl_double x) __NOEXC { return -(std::isnormal(x)); } +cl_int IsNormal(s::cl_half x) __NOEXC { return std::isnormal(x); } +cl_short __vIsNormal(s::cl_half x) __NOEXC { return -(std::isnormal(x)); } +MAKE_1V_FUNC(IsNormal, __vIsNormal, s::cl_int, s::cl_float) +MAKE_1V_FUNC(IsNormal, __vIsNormal, s::cl_long, s::cl_double) +MAKE_1V_FUNC(IsNormal, __vIsNormal, s::cl_short, s::cl_half) + +// (Ordered) // isordered +cl_int Ordered(s::cl_float x, s::cl_float y) __NOEXC { + return __vOrdered(x, y); +} +cl_int Ordered(s::cl_double x, s::cl_double y) __NOEXC { + return __vOrdered(x, y); +} +cl_int Ordered(s::cl_half x, s::cl_half y) __NOEXC { + return __vOrdered(x, y); +} +MAKE_1V_2V_FUNC(Ordered, __vOrdered, s::cl_int, s::cl_float, s::cl_float) +MAKE_1V_2V_FUNC(Ordered, __vOrdered, s::cl_long, s::cl_double, s::cl_double) +MAKE_1V_2V_FUNC(Ordered, __vOrdered, s::cl_short, s::cl_half, s::cl_half) + +// (Unordered) // isunordered +cl_int Unordered(s::cl_float x, s::cl_float y) __NOEXC { + return __sUnordered(x, y); +} +cl_int Unordered(s::cl_double x, s::cl_double y) __NOEXC { + return __sUnordered(x, y); +} +cl_int Unordered(s::cl_half x, s::cl_half y) __NOEXC { + return __sUnordered(x, y); +} +MAKE_1V_2V_FUNC(Unordered, __vUnordered, s::cl_int, s::cl_float, s::cl_float) -MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_long, s::cl_double, +MAKE_1V_2V_FUNC(Unordered, __vUnordered, s::cl_long, s::cl_double, s::cl_double) -MAKE_1V_2V_FUNC(OpUnordered, __vOpUnordered, s::cl_short, s::cl_half, +MAKE_1V_2V_FUNC(Unordered, __vUnordered, s::cl_short, s::cl_half, s::cl_half) -// (OpSignBitSet) // signbit -cl_int OpSignBitSet(s::cl_float x) __NOEXC { return std::signbit(x); } -cl_int OpSignBitSet(s::cl_double x) __NOEXC { return std::signbit(x); } -cl_int __vOpSignBitSet(s::cl_float x) __NOEXC { return -(std::signbit(x)); } -cl_long __vOpSignBitSet(s::cl_double x) __NOEXC { return -(std::signbit(x)); } -cl_int OpSignBitSet(s::cl_half x) __NOEXC { return std::signbit(x); } -cl_short __vOpSignBitSet(s::cl_half x) __NOEXC { return -(std::signbit(x)); } -MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_int, s::cl_float) -MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_long, s::cl_double) -MAKE_1V_FUNC(OpSignBitSet, __vOpSignBitSet, s::cl_short, s::cl_half) - -// (OpAny) // any -MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_char) -MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_short) -MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_int) -MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::cl_long) -MAKE_SR_1V_OR(OpAny, __OpAny, s::cl_int, s::longlong) - -// (OpAll) // all -MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_char) -MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_short) -MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_int) -MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::cl_long) -MAKE_SR_1V_AND(OpAll, __OpAll, s::cl_int, s::longlong) +// (SignBitSet) // signbit +cl_int SignBitSet(s::cl_float x) __NOEXC { return std::signbit(x); } +cl_int SignBitSet(s::cl_double x) __NOEXC { return std::signbit(x); } +cl_int __vSignBitSet(s::cl_float x) __NOEXC { return -(std::signbit(x)); } +cl_long __vSignBitSet(s::cl_double x) __NOEXC { return -(std::signbit(x)); } +cl_int SignBitSet(s::cl_half x) __NOEXC { return std::signbit(x); } +cl_short __vSignBitSet(s::cl_half x) __NOEXC { return -(std::signbit(x)); } +MAKE_1V_FUNC(SignBitSet, __vSignBitSet, s::cl_int, s::cl_float) +MAKE_1V_FUNC(SignBitSet, __vSignBitSet, s::cl_long, s::cl_double) +MAKE_1V_FUNC(SignBitSet, __vSignBitSet, s::cl_short, s::cl_half) + +// (Any) // any +MAKE_SR_1V_OR(Any, __Any, s::cl_int, s::cl_char) +MAKE_SR_1V_OR(Any, __Any, s::cl_int, s::cl_short) +MAKE_SR_1V_OR(Any, __Any, s::cl_int, s::cl_int) +MAKE_SR_1V_OR(Any, __Any, s::cl_int, s::cl_long) +MAKE_SR_1V_OR(Any, __Any, s::cl_int, s::longlong) + +// (All) // all +MAKE_SR_1V_AND(All, __All, s::cl_int, s::cl_char) +MAKE_SR_1V_AND(All, __All, s::cl_int, s::cl_short) +MAKE_SR_1V_AND(All, __All, s::cl_int, s::cl_int) +MAKE_SR_1V_AND(All, __All, s::cl_int, s::cl_long) +MAKE_SR_1V_AND(All, __All, s::cl_int, s::longlong) // (bitselect) // Instantiate functions for the scalar types and vector types. @@ -2519,64 +2519,64 @@ MAKE_SC_1V_2V_3V(bitselect, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) MAKE_SC_1V_2V_3V(bitselect, s::cl_half, s::cl_half, s::cl_half, s::cl_half) -// (OpSelect) // select +// (Select) // select // for scalar: result = c ? b : a. // for vector: result[i] = (MSB of c[i] is set)? b[i] : a[i] -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_int, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_float, s::cl_int, s::cl_float, s::cl_float) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_float, s::cl_uint, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_float, s::cl_uint, s::cl_float, s::cl_float) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_long, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_double, s::cl_long, s::cl_double, s::cl_double) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::cl_ulong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_double, s::cl_ulong, s::cl_double, s::cl_double) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::longlong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_double, s::longlong, s::cl_double, s::cl_double) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_double, s::ulonglong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_double, s::ulonglong, s::cl_double, s::cl_double) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_char, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_char, s::cl_char, s::cl_char, s::cl_char) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_char, s::cl_uchar, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_char, s::cl_uchar, s::cl_char, s::cl_char) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_char, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_uchar, s::cl_char, s::cl_uchar, s::cl_uchar) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uchar, s::cl_uchar, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_uchar, s::cl_uchar, s::cl_uchar, s::cl_uchar) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_short, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_short, s::cl_short, s::cl_short, s::cl_short) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_short, s::cl_ushort, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_short, s::cl_ushort, s::cl_short, s::cl_short) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_short, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_ushort, s::cl_short, s::cl_ushort, s::cl_ushort) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ushort, s::cl_ushort, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_ushort, s::cl_ushort, s::cl_ushort, s::cl_ushort) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_int, s::cl_int, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_int, s::cl_int, s::cl_int, s::cl_int) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_int, s::cl_uint, s::cl_int, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_int, s::cl_uint, s::cl_int, s::cl_int) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_int, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_uint, s::cl_int, s::cl_uint, s::cl_uint) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_uint, s::cl_uint, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_uint, s::cl_uint, s::cl_uint, s::cl_uint) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_long, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_long, s::cl_long, s::cl_long, s::cl_long) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_long, s::cl_ulong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_long, s::cl_ulong, s::cl_long, s::cl_long) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_long, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_ulong, s::cl_long, s::cl_ulong, s::cl_ulong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_ulong, s::cl_ulong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_ulong, s::cl_ulong, s::cl_ulong, s::cl_ulong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::longlong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::longlong, s::longlong, s::longlong, s::longlong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::longlong, s::ulonglong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::longlong, s::ulonglong, s::longlong, s::longlong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::ulonglong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::ulonglong, s::ulonglong, s::ulonglong, s::ulonglong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::ulonglong, s::longlong, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::ulonglong, s::longlong, s::ulonglong, s::ulonglong) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_short, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_half, s::cl_short, s::cl_half, s::cl_half) -MAKE_SC_FSC_1V_2V_3V_FV(OpSelect, __vOpSelect, s::cl_half, s::cl_ushort, +MAKE_SC_FSC_1V_2V_3V_FV(Select, __vSelect, s::cl_half, s::cl_ushort, s::cl_half, s::cl_half) /* --------------- 4.13.3 Native Math functions. Host version ---------------*/ diff --git a/sycl/source/spirv_ops.cpp b/sycl/source/spirv_ops.cpp index 93a587cfe9dd0..10bd15a589947 100644 --- a/sycl/source/spirv_ops.cpp +++ b/sycl/source/spirv_ops.cpp @@ -10,23 +10,20 @@ #include #include -namespace cl { -namespace __spirv { - // This operation is NOP on HOST as all operations there are blocking and // by the moment this function was called, the operations generating -// the OpTypeEvent objects had already been finished. -void OpGroupWaitEvents(int32_t Scope, uint32_t NumEvents, - OpTypeEvent ** WaitEvents) noexcept { +// the __ocl_event_t objects had already been finished. +void __spirv_GroupWaitEvents(int32_t Scope, uint32_t NumEvents, + __ocl_event_t * WaitEvents) noexcept { } -void OpControlBarrier(Scope Execution, Scope Memory, +void __spirv_ControlBarrier(Scope Execution, Scope Memory, uint32_t Semantics) noexcept { throw cl::sycl::runtime_error( "Barrier is not supported on the host device yet."); } -void OpMemoryBarrier(Scope Memory, uint32_t Semantics) noexcept { +void __spirv_MemoryBarrier(Scope Memory, uint32_t Semantics) noexcept { // 1. The 'Memory' parameter is ignored on HOST because there is no memory // separation to global and local there. // 2. The 'Semantics' parameter is ignored because there is no need @@ -34,7 +31,7 @@ void OpMemoryBarrier(Scope Memory, uint32_t Semantics) noexcept { atomic_thread_fence(std::memory_order_seq_cst); } -void prefetch(const char *Ptr, size_t NumBytes) noexcept { +void __spirv_ocl_prefetch(const char *Ptr, size_t NumBytes) noexcept { // TODO: the cache line size may be different. const size_t CacheLineSize = 64; size_t NumCacheLines = @@ -44,6 +41,3 @@ void prefetch(const char *Ptr, size_t NumBytes) noexcept { Ptr += 64; } } - -} // namespace __spirv -} // namespace cl