Cuda half macros cleanup (#10147)

Summary: This PR removes couple of macros throughout TH* as part of the re-factoring effort for ATen. Removing these macros should avoid confusion among developers who are trying to move things from TH* to ATen. This PR is part of the THCNumerics deprecation that I have been working on following up on mruberry's pytorch/pytorch#9318. I am separating these two commits to see if removal of these macros doesn't upset the pytorch public CI, as well as internal builds. - Commit pytorch/pytorch@1248de7 removes the code paths guarded by `CUDA_HALF_INSTRUCTIONS` macro. Since the macro was removed in commit pytorch/pytorch@2f186df, `ifdef CUDA_HALF_INSTRUCTIONS` would return false and hence the code path that is kept after this change is for the false case of `ifdef CUDA_HALF_INSTRUCTIONS` - Commit pytorch/pytorch@520c99b removes the code paths guarded by `CUDA_HALF_TENSOR` macro. Since Pytorch now provides support for only CUDA 8.0 and above, `CUDA_HALF_TENSOR` is always true since CUDA 8.0 satisfies `CUDA_HAS_FP16` and hence, the code path that is kept after this change is for the true case of `ifdef CUDA_HALF_TENSOR`. Pull Request resolved: pytorch/pytorch#10147 Differential Revision: D9345940 Pulled By: soumith fbshipit-source-id: c9392261dd432d304f1cdaf961760cbd164a59d0
TonyTangYu · Aug 15, 2018 · 5adcac3 · 5adcac3
1 parent 86363e1
commit 5adcac3
Show file tree

Hide file tree

Showing 28 changed files with 1 addition and 377 deletions.
diff --git a/aten/src/THC/THCAtomics.cuh b/aten/src/THC/THCAtomics.cuh
@@ -96,7 +96,6 @@ static inline __device__ void atomicAdd(int64_t *address, int64_t val) {
   AtomicAddIntegerImpl<int64_t, sizeof(int64_t)>()(address, val);
 }
 
-#ifdef CUDA_HALF_TENSOR
 static inline  __device__ void atomicAdd(half *address, half val) {
   unsigned int * address_as_ui =
     (unsigned int *) ((char *)address - ((size_t)address & 2));
@@ -122,7 +121,6 @@ static inline  __device__ void atomicAdd(half *address, half val) {
 static inline __device__ void atomicAdd(at::Half *address, half val) {
   return atomicAdd(reinterpret_cast<half*>(address), val);
 }
-#endif
 
 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000)
 // from CUDA C Programmic Guide

diff --git a/aten/src/THC/THCBlas.cu b/aten/src/THC/THCBlas.cu
@@ -48,7 +48,6 @@ double THCudaBlas_Ddot(THCState *state, int64_t n, double *x, int64_t incx, doub
   return 0;
 }
 
-#ifdef CUDA_HALF_TENSOR
 half THCudaBlas_Hdot(THCState *state, int64_t n, half *x, int64_t incx, half *y, int64_t incy)
 {
 #if CUDA_VERSION >= 8000
@@ -77,7 +76,6 @@ half THCudaBlas_Hdot(THCState *state, int64_t n, half *x, int64_t incx, half *y,
   return THC_float2half(0);
 #endif
 }
-#endif
 
 /* Level 2 */
 
@@ -262,7 +260,6 @@ void THCudaBlas_Sgemm(THCState *state, char transa, char transb, int64_t m, int6
           "with the bound [val] <= %d", INT_MAX);
 }
 
-#ifdef CUDA_HALF_TENSOR
 // In CUDA 8.0, definition of data types for sgemmex changed
 #if CUDA_VERSION < 8000
 #  define CUDA_R_16F CUBLAS_DATA_HALF
@@ -321,7 +318,6 @@ void THCudaBlas_Hgemm(THCState *state, char transa, char transb, int64_t m, int6
   THError("Cublas_Hgemm only supports m, n, k, lda, ldb, ldc"
           "with th bound [val] <= %d", INT_MAX);
 }
-#endif
 
 void THCudaBlas_Dgemm(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, double *c, int64_t ldc)
 {

diff --git a/aten/src/THC/THCBlas.h b/aten/src/THC/THCBlas.h
@@ -7,9 +7,7 @@
 /* Level 1 */
 THC_API float THCudaBlas_Sdot(THCState *state, int64_t n, float *x, int64_t incx, float *y, int64_t incy);
 THC_API double THCudaBlas_Ddot(THCState *state, int64_t n, double *x, int64_t incx, double *y, int64_t incy);
-#ifdef CUDA_HALF_TENSOR
 THC_API half THCudaBlas_Hdot(THCState *state, int64_t n, half *x, int64_t incx, half *y, int64_t incy);
-#endif
 
 /* Level 2 */
 THC_API void THCudaBlas_Sgemv(THCState *state, char trans, int64_t m, int64_t n, float alpha, float *a, int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy);
@@ -21,9 +19,7 @@ THC_API void THCudaBlas_Dger(THCState *state, int64_t m, int64_t n, double alpha
 THC_API void THCudaBlas_Sgemm(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float beta, float *c, int64_t ldc);
 THC_API void THCudaBlas_Dgemm(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, double *c, int64_t ldc);
 
-#ifdef CUDA_HALF_TENSOR
 THC_API void THCudaBlas_Hgemm(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k, half alpha, half *a, int64_t lda, half *b, int64_t ldb, half beta, half *c, int64_t ldc);
-#endif
 
 THC_API void THCudaBlas_SgemmBatched(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k,
                                      float alpha, const float *a[], int64_t lda, const float *b[], int64_t ldb,

diff --git a/aten/src/THC/THCGenerateHalfType.h b/aten/src/THC/THCGenerateHalfType.h
@@ -4,17 +4,11 @@
 
 #include "THCHalf.h"
 
-#if defined(CUDA_HALF_TENSOR) || defined(FORCE_TH_HALF)
-
 #define real half
 #define accreal float
 #define Real Half
 
-// if only here via FORCE_TH_HALF, don't define CReal since
-// FORCE_TH_HALF should only be used for TH types
-#ifdef CUDA_HALF_TENSOR
 #define CReal CudaHalf
-#endif
 
 #define THC_REAL_IS_HALF
 #line 1 THC_GENERIC_FILE
@@ -23,14 +17,10 @@
 #undef accreal
 #undef Real
 
-#ifdef CUDA_HALF_TENSOR
 #undef CReal
-#endif
 
 #undef THC_REAL_IS_HALF
 
-#endif // defined(CUDA_HALF_TENSOR) || defined(FORCE_TH_HALF)
-
 #ifndef THCGenerateAllTypes
 #ifndef THCGenerateFloatTypes
 #undef THC_GENERIC_FILE

diff --git a/aten/src/THC/THCHalf.h b/aten/src/THC/THCHalf.h
@@ -3,13 +3,6 @@
 
 #include "THCGeneral.h"
 
-/* We compile with CudaHalfTensor support if we have this: */
-#if CUDA_VERSION >= 7050 || CUDA_HAS_FP16 || defined(__HIP_PLATFORM_HCC__)
-#define CUDA_HALF_TENSOR 1
-#endif
-
-#ifdef CUDA_HALF_TENSOR
-
 #include <cuda_fp16.h>
 #include <stdint.h>
 
@@ -30,6 +23,4 @@ THC_API int THC_nativeHalfInstructions(THCState *state);
 /* Check for performant native fp16 support on the current device */
 THC_API int THC_fastHalfInstructions(THCState *state);
 
-#endif /* CUDA_HALF_TENSOR */
-
 #endif