diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cu.cc b/tensorflow/core/kernels/fused_batch_norm_op.cu.cc index 7cf6b44570962a..0da493b40c54f7 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.cu.cc +++ b/tensorflow/core/kernels/fused_batch_norm_op.cu.cc @@ -50,7 +50,12 @@ __global__ void InvVarianceToVarianceKernel(int nthreads, double epsilon, int sample_size, T* variance) { GPU_1D_KERNEL_LOOP(index, nthreads) { T inv_var = variance[index]; +#if GOOGLE_CUDA T var = __fdividef(1, inv_var * inv_var) - T(epsilon); +#TODO: fix this in ROCDL or LC +#elif TENSORFLOW_USE_ROCM + T var = 1 / (inv_var * inv_var) - T(epsilon); +#endif // This is for Bessel's correction var *= T(sample_size) / T((sample_size > 1) ? sample_size - 1 : 1); variance[index] = (var > 0) ? var : 0;