-
Notifications
You must be signed in to change notification settings - Fork 73
Closed
Description
Calling the h2rcp() in rocm5.6 looks like it's converting the underlying storage as a short into a float and doing the reciprocal on that. Instead of 1/4.0=0.25, it produces 0.000057.
I tested this with gfx1010 in the docker image rocm/dev-ubuntu-20.04:5.6-complete but targeting gfx1030 gives an identical kernel disassembly so the same error should happen.
#include <stdio.h>
#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
__device__ __forceinline__ __half2 __alternate_h2rcp(__half2 x) {
return _Float16_2{static_cast<_Float16>(__builtin_amdgcn_rcph(static_cast<__half2_raw>(x).data.x)),
static_cast<_Float16>(__builtin_amdgcn_rcph(static_cast<__half2_raw>(x).data.y))};
}
__global__ void do_rcp(half2* result, const half2* source)
{
result[0] = h2rcp(source[0]);
result[1] = __alternate_h2rcp(source[1]);
}
int main(int argc, char *argv[])
{
half2 *src_d, *result_d;
half2 *src_h, *result_h;
size_t N = 2;
size_t Nbytes = N * sizeof(half2);
src_h = (half2*)malloc(Nbytes);
result_h = (half2*)malloc(Nbytes);
src_h[0] = __floats2half2_rn(4.0f, 9.0f);
src_h[1] = __floats2half2_rn(4.0f, 9.0f);
hipMalloc(&src_d, Nbytes);
hipMalloc(&result_d, Nbytes);
hipMemcpy(src_d, src_h, Nbytes, hipMemcpyHostToDevice);
hipLaunchKernelGGL(do_rcp, dim3(1), dim3(1), 0, 0,
result_d, src_d);
hipMemcpy(result_h, result_d, Nbytes, hipMemcpyDeviceToHost);
printf("rocm: 1/%f = %f\n", __low2float(src_h[0]), __low2float(result_h[0]));
printf("rocm: 1/%f = %f\n", __high2float(src_h[0]), __high2float(result_h[0]));
printf("alternate: 1/%f = %f\n", __low2float(src_h[1]), __low2float(result_h[1]));
printf("alternate: 1/%f = %f\n", __high2float(src_h[1]), __high2float(result_h[1]));
}
Metadata
Metadata
Assignees
Labels
No labels