Skip to content

Commit 8001a0c

Browse files
whchungsunway513
authored andcommitted
Merge pull request #25 from ROCmSoftwarePlatform/fix_fdividef
fix hcc linking error caused by __fdividef
1 parent 5469908 commit 8001a0c

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

tensorflow/core/kernels/fused_batch_norm_op.cu.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,12 @@ __global__ void InvVarianceToVarianceKernel(int nthreads, double epsilon,
5050
int sample_size, T* variance) {
5151
GPU_1D_KERNEL_LOOP(index, nthreads) {
5252
T inv_var = variance[index];
53+
#if GOOGLE_CUDA
5354
T var = __fdividef(1, inv_var * inv_var) - T(epsilon);
55+
#TODO: fix this in ROCDL or LC
56+
#elif TENSORFLOW_USE_ROCM
57+
T var = 1 / (inv_var * inv_var) - T(epsilon);
58+
#endif
5459
// This is for Bessel's correction
5560
var *= T(sample_size) / T((sample_size > 1) ? sample_size - 1 : 1);
5661
variance[index] = (var > 0) ? var : 0;

0 commit comments

Comments
 (0)