Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ float __clc__atomic_fetch_add_float_local_seq_cst(__local float *, float) __asm(
__CLC_ATOMICFADDEXT(float, global)
__CLC_ATOMICFADDEXT(float, local)

_CLC_DECL float
_CLC_DEF float
_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(
__global float *pointer, unsigned int scope, unsigned int semantics,
float value) {
return __spirv_AtomicFAddEXT(pointer, scope, semantics, value);
}

_CLC_DECL float
_CLC_DEF float
_Z21__spirv_AtomicFAddEXTPU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(
__local float *pointer, unsigned int scope, unsigned int semantics,
float value) {
Expand Down Expand Up @@ -116,15 +116,15 @@ double __clc__atomic_fetch_add_double_local_seq_cst(
__CLC_ATOMICFADDEXT(double, global)
__CLC_ATOMICFADDEXT(double, local)

_CLC_DECL double
_CLC_DEF double
_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(
__global double *pointer, unsigned int scope, unsigned int semantics,
double value) {
// FIXME: Double-precision atomics must be emulated for __CUDA_ARCH <= sm_50
return __spirv_AtomicFAddEXT(pointer, scope, semantics, value);
}

_CLC_DECL double
_CLC_DEF double
_Z21__spirv_AtomicFAddEXTPU3AS3dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(
__local double *pointer, unsigned int scope, unsigned int semantics,
double value) {
Expand Down
23 changes: 12 additions & 11 deletions libclc/ptx-nvidiacl/libspirv/group/collectives.cl
Original file line number Diff line number Diff line change
Expand Up @@ -369,16 +369,16 @@ __CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX)
__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX)

// half requires additional mangled entry points
_CLC_DECL _CLC_CONVERGENT half _Z17__spirv_GroupFAddjjDF16_(uint scope, uint op,
half x) {
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFAddjjDF16_(uint scope, uint op,
half x) {
return __spirv_GroupFAdd(scope, op, x);
}
_CLC_DECL _CLC_CONVERGENT half _Z17__spirv_GroupFMinjjDF16_(uint scope, uint op,
half x) {
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFMinjjDF16_(uint scope, uint op,
half x) {
return __spirv_GroupFMin(scope, op, x);
}
_CLC_DECL _CLC_CONVERGENT half _Z17__spirv_GroupFMaxjjDF16_(uint scope, uint op,
half x) {
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFMaxjjDF16_(uint scope, uint op,
half x) {
return __spirv_GroupFMax(scope, op, x);
}

Expand Down Expand Up @@ -461,20 +461,21 @@ __CLC_GROUP_BROADCAST(float)
__CLC_GROUP_BROADCAST(double)

// half requires additional mangled entry points
_CLC_DECL _CLC_CONVERGENT half
_CLC_DEF _CLC_CONVERGENT half
_Z17__spirv_GroupBroadcastjDF16_m(uint scope, half x, ulong local_id) {
return __spirv_GroupBroadcast(scope, x, local_id);
}
_CLC_DECL _CLC_CONVERGENT half
_CLC_DEF _CLC_CONVERGENT half
_Z17__spirv_GroupBroadcastjDF16_Dv2_m(uint scope, half x, ulong2 local_id) {
return __spirv_GroupBroadcast(scope, x, local_id);
}
_CLC_DECL _CLC_CONVERGENT half
_CLC_DEF _CLC_CONVERGENT half
_Z17__spirv_GroupBroadcastjDF16_Dv3_m(uint scope, half x, ulong3 local_id) {
return __spirv_GroupBroadcast(scope, x, local_id);
}
_CLC_DECL _CLC_CONVERGENT half
_Z22__spirv_GroupBroadcastjDF16_j(uint scope, half x, uint local_id) {
_CLC_DEF _CLC_CONVERGENT half _Z22__spirv_GroupBroadcastjDF16_j(uint scope,
half x,
uint local_id) {
return __spirv_GroupBroadcast(scope, x, (ulong)local_id);
}

Expand Down
24 changes: 12 additions & 12 deletions libclc/ptx-nvidiacl/libspirv/images/image.cl
Original file line number Diff line number Diff line change
Expand Up @@ -319,15 +319,15 @@ _DEFINE_READ_3D_PIXELF(16, clamp)

// Unsampled images
#define _CLC_DEFINE_IMAGE1D_READ_BUILTIN(elem_t, elem_t_mangled, elem_size) \
_CLC_DECL \
_CLC_DEF \
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image1d_roiET_T0_T1_( \
read_only image1d_t image, int x) { \
return out_##elem_t( \
__nvvm_suld_1d_v4i##elem_size##_clamp(image, x * sizeof(elem_t##4))); \
}

#define _CLC_DEFINE_IMAGE2D_READ_BUILTIN(elem_t, elem_t_mangled, elem_size) \
_CLC_DECL \
_CLC_DEF \
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image2d_roDv2_iET_T0_T1_( \
read_only image2d_t image, int2 coord) { \
return out_##elem_t(__nvvm_suld_2d_v4i##elem_size##_clamp( \
Expand All @@ -336,7 +336,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)

#define _CLC_DEFINE_IMAGE3D_READ_BUILTIN(elem_t, elem_t_mangled, elem_size, \
coord_mangled) \
_CLC_DECL \
_CLC_DEF \
elem_t##4 _Z17__spirv_ImageReadIDv4_##elem_t_mangled##14ocl_image3d_ro##coord_mangled##ET_T0_T1_( \
read_only image3d_t image, int4 coord) { \
return out_##elem_t(__nvvm_suld_3d_v4i##elem_size##_clamp( \
Expand All @@ -345,7 +345,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)

#define _CLC_DEFINE_IMAGE1D_WRITE_BUILTIN(elem_t, elem_t_mangled, elem_size, \
int_rep) \
_CLC_DECL void \
_CLC_DEF void \
_Z18__spirv_ImageWriteI14ocl_image1d_woiDv4_##elem_t_mangled##EvT_T0_T1_( \
write_only image1d_t image, int x, elem_t##4 c) { \
__nvvm_sust_1d_v4i##elem_size##_clamp( \
Expand All @@ -355,7 +355,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)

#define _CLC_DEFINE_IMAGE2D_WRITE_BUILTIN(elem_t, elem_t_mangled, elem_size, \
int_rep) \
_CLC_DECL void \
_CLC_DEF void \
_Z18__spirv_ImageWriteI14ocl_image2d_woDv2_iDv4_##elem_t_mangled##EvT_T0_T1_( \
write_only image2d_t image, int2 coord, elem_t##4 c) { \
__nvvm_sust_2d_v4i##elem_size##_clamp( \
Expand All @@ -365,7 +365,7 @@ _DEFINE_READ_3D_PIXELF(16, clamp)

#define _CLC_DEFINE_IMAGE3D_WRITE_BUILTIN(elem_t, elem_t_mangled, elem_size, \
int_rep, val_mangled) \
_CLC_DECL void \
_CLC_DEF void \
_Z18__spirv_ImageWriteI14ocl_image3d_woDv4_i##val_mangled##EvT_T0_T1_( \
write_only image3d_t image, int4 coord, elem_t##4 c) { \
__nvvm_sust_3d_v4i##elem_size##_clamp( \
Expand Down Expand Up @@ -414,7 +414,7 @@ _CLC_DEFINE_IMAGE3D_WRITE_BUILTIN(half, DF16_, 16, short, Dv4_DF16_)

// Sampled images
#define _CLC_DEFINE_SAMPLED_IMAGE_BUILTIN(dims) \
_CLC_DECL __ocl_sampled_image##dims##d_ro_t \
_CLC_DEF __ocl_sampled_image##dims##d_ro_t \
_Z20__spirv_SampledImageI14ocl_image##dims##d_ro32__spirv_SampledImage__image##dims##d_roET0_T_11ocl_sampler( \
read_only image##dims##d_t image, sampler_t sampler) { \
return __clc__sampled_image##dims##d_pack(image, sampler); \
Expand Down Expand Up @@ -899,7 +899,7 @@ _DEFINE_SAMPLED_LOADS(half, 16)
#define _CLC_DEFINE_IMAGE_SAMPLED_READ_BUILTIN( \
elem_t, elem_t_mangled, dims, input_coord_t, input_coord_t_mangled, \
sampling_coord_t) \
_CLC_DECL \
_CLC_DEF \
elem_t##4 _Z30__spirv_ImageSampleExplicitLodI32__spirv_SampledImage__image##dims##d_roDv4_##elem_t_mangled##input_coord_t_mangled##ET0_T_T1_if( \
__ocl_sampled_image##dims##d_ro_t sampled_image, \
input_coord_t input_coord, int operands, float lod) { \
Expand Down Expand Up @@ -973,22 +973,22 @@ _CLC_DEFINE_IMAGE_SAMPLED_READ_BUILTIN(half, DF16_, 3, int4, Dv4_i, float4)
#undef _CLC_DEFINE_IMAGE_SAMPLED_READ_BUILTIN

// Size Queries
_CLC_DECL int _Z22__spirv_ImageQuerySizeIDv1_i14ocl_image1d_roET_T0_(
_CLC_DEF int _Z22__spirv_ImageQuerySizeIDv1_i14ocl_image1d_roET_T0_(
read_only image1d_t image) {
return __nvvm_suq_width_1i(image);
}

_CLC_DECL int2 _Z22__spirv_ImageQuerySizeIDv2_i14ocl_image2d_roET_T0_(
_CLC_DEF int2 _Z22__spirv_ImageQuerySizeIDv2_i14ocl_image2d_roET_T0_(
read_only image2d_t image) {
int width = __nvvm_suq_width_2i(image);
int height = __nvvm_suq_height_2i(image);
return (int2)(width, height);
}

_CLC_DECL int3 _Z22__spirv_ImageQuerySizeIDv3_i14ocl_image3d_roET_T0_(
_CLC_DEF int3 _Z22__spirv_ImageQuerySizeIDv3_i14ocl_image3d_roET_T0_(
read_only image3d_t image) {
int width = __nvvm_suq_width_3i(image);
int height = __nvvm_suq_height_3i(image);
int depth = __nvvm_suq_depth_3i(image);
return (int3)(width, height, depth);
}
}