Skip to content

Commit 08f9981

Browse files
Merge pull request #1 from aravindhbalaji1985/aravindhbalaji1985-patch-1
Update gpu_prim.h
2 parents fbfb80b + 28c32ab commit 08f9981

File tree

1 file changed

+4
-7
lines changed

1 file changed

+4
-7
lines changed

tensorflow/core/kernels/gpu_prim.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,9 @@ __device__ __forceinline__ void ThreadStoreVolatilePtr<Eigen::half>(
4444
Eigen::numext::bit_cast<uint16_t>(val);
4545
}
4646

47-
template <>
48-
__device__ __forceinline__ Eigen::half ThreadLoadVolatilePointer<Eigen::half>(
47+
__device__ __forceinline__ Eigen::half ThreadLoadVolatilePointer(
4948
Eigen::half *ptr, Int2Type<true> /*is_primitive*/) {
50-
uint16_t result = *reinterpret_cast<volatile uint16_t *>(ptr);
49+
const uint16_t result = *reinterpret_cast<volatile const uint16_t *>(ptr);
5150
return Eigen::numext::bit_cast<Eigen::half>(result);
5251
}
5352

@@ -59,10 +58,8 @@ __device__ __forceinline__ void ThreadStoreVolatilePtr<Eigen::bfloat16>(
5958
Eigen::numext::bit_cast<uint16_t>(val);
6059
}
6160

62-
template <>
63-
__device__ __forceinline__ Eigen::bfloat16
64-
ThreadLoadVolatilePointer<Eigen::bfloat16>(Eigen::bfloat16 *ptr,
65-
Int2Type<true> /*is_primitive*/) {
61+
__device__ __forceinline__ Eigen::bfloat16 ThreadLoadVolatilePointer(
62+
Eigen::bfloat16 *ptr, Int2Type<true> /*is_primitive*/) {
6663
uint16_t result = *reinterpret_cast<volatile uint16_t *>(ptr);
6764
return Eigen::numext::bit_cast<Eigen::bfloat16>(result);
6865
}

0 commit comments

Comments
 (0)