Skip to content

Commit

Permalink
Cuda half macros cleanup (#10147)
Browse files Browse the repository at this point in the history
Summary:
This PR removes couple of macros throughout TH* as part of the re-factoring effort for ATen. Removing these macros should avoid confusion among developers who are trying to move things from TH* to ATen. This PR is part of the THCNumerics deprecation that I have been working on following up on mruberry's pytorch/pytorch#9318. I am separating these two commits to see if removal of these macros doesn't upset the pytorch public CI, as well as internal builds.

- Commit pytorch/pytorch@1248de7 removes the code paths guarded by `CUDA_HALF_INSTRUCTIONS` macro. Since the macro was removed in commit pytorch/pytorch@2f186df, `ifdef CUDA_HALF_INSTRUCTIONS` would return false and hence the code path that is kept after this change is for the false case of `ifdef CUDA_HALF_INSTRUCTIONS`

- Commit pytorch/pytorch@520c99b removes the code paths guarded by `CUDA_HALF_TENSOR` macro. Since Pytorch now provides support for only CUDA 8.0 and above, `CUDA_HALF_TENSOR` is always true since CUDA 8.0 satisfies `CUDA_HAS_FP16` and hence, the code path that is kept after this change is for the true case of `ifdef CUDA_HALF_TENSOR`.
Pull Request resolved: pytorch/pytorch#10147

Differential Revision: D9345940

Pulled By: soumith

fbshipit-source-id: c9392261dd432d304f1cdaf961760cbd164a59d0
  • Loading branch information
syed-ahmed authored and facebook-github-bot committed Aug 15, 2018
1 parent 86363e1 commit 5adcac3
Show file tree
Hide file tree
Showing 28 changed files with 1 addition and 377 deletions.
2 changes: 0 additions & 2 deletions aten/src/THC/THCAtomics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ static inline __device__ void atomicAdd(int64_t *address, int64_t val) {
AtomicAddIntegerImpl<int64_t, sizeof(int64_t)>()(address, val);
}

#ifdef CUDA_HALF_TENSOR
static inline __device__ void atomicAdd(half *address, half val) {
unsigned int * address_as_ui =
(unsigned int *) ((char *)address - ((size_t)address & 2));
Expand All @@ -122,7 +121,6 @@ static inline __device__ void atomicAdd(half *address, half val) {
static inline __device__ void atomicAdd(at::Half *address, half val) {
return atomicAdd(reinterpret_cast<half*>(address), val);
}
#endif

#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000)
// from CUDA C Programmic Guide
Expand Down
4 changes: 0 additions & 4 deletions aten/src/THC/THCBlas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ double THCudaBlas_Ddot(THCState *state, int64_t n, double *x, int64_t incx, doub
return 0;
}

#ifdef CUDA_HALF_TENSOR
half THCudaBlas_Hdot(THCState *state, int64_t n, half *x, int64_t incx, half *y, int64_t incy)
{
#if CUDA_VERSION >= 8000
Expand Down Expand Up @@ -77,7 +76,6 @@ half THCudaBlas_Hdot(THCState *state, int64_t n, half *x, int64_t incx, half *y,
return THC_float2half(0);
#endif
}
#endif

/* Level 2 */

Expand Down Expand Up @@ -262,7 +260,6 @@ void THCudaBlas_Sgemm(THCState *state, char transa, char transb, int64_t m, int6
"with the bound [val] <= %d", INT_MAX);
}

#ifdef CUDA_HALF_TENSOR
// In CUDA 8.0, definition of data types for sgemmex changed
#if CUDA_VERSION < 8000
# define CUDA_R_16F CUBLAS_DATA_HALF
Expand Down Expand Up @@ -321,7 +318,6 @@ void THCudaBlas_Hgemm(THCState *state, char transa, char transb, int64_t m, int6
THError("Cublas_Hgemm only supports m, n, k, lda, ldb, ldc"
"with th bound [val] <= %d", INT_MAX);
}
#endif

void THCudaBlas_Dgemm(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, double *c, int64_t ldc)
{
Expand Down
4 changes: 0 additions & 4 deletions aten/src/THC/THCBlas.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
/* Level 1 */
THC_API float THCudaBlas_Sdot(THCState *state, int64_t n, float *x, int64_t incx, float *y, int64_t incy);
THC_API double THCudaBlas_Ddot(THCState *state, int64_t n, double *x, int64_t incx, double *y, int64_t incy);
#ifdef CUDA_HALF_TENSOR
THC_API half THCudaBlas_Hdot(THCState *state, int64_t n, half *x, int64_t incx, half *y, int64_t incy);
#endif

/* Level 2 */
THC_API void THCudaBlas_Sgemv(THCState *state, char trans, int64_t m, int64_t n, float alpha, float *a, int64_t lda, float *x, int64_t incx, float beta, float *y, int64_t incy);
Expand All @@ -21,9 +19,7 @@ THC_API void THCudaBlas_Dger(THCState *state, int64_t m, int64_t n, double alpha
THC_API void THCudaBlas_Sgemm(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k, float alpha, float *a, int64_t lda, float *b, int64_t ldb, float beta, float *c, int64_t ldc);
THC_API void THCudaBlas_Dgemm(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k, double alpha, double *a, int64_t lda, double *b, int64_t ldb, double beta, double *c, int64_t ldc);

#ifdef CUDA_HALF_TENSOR
THC_API void THCudaBlas_Hgemm(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k, half alpha, half *a, int64_t lda, half *b, int64_t ldb, half beta, half *c, int64_t ldc);
#endif

THC_API void THCudaBlas_SgemmBatched(THCState *state, char transa, char transb, int64_t m, int64_t n, int64_t k,
float alpha, const float *a[], int64_t lda, const float *b[], int64_t ldb,
Expand Down
10 changes: 0 additions & 10 deletions aten/src/THC/THCGenerateHalfType.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,11 @@

#include "THCHalf.h"

#if defined(CUDA_HALF_TENSOR) || defined(FORCE_TH_HALF)

#define real half
#define accreal float
#define Real Half

// if only here via FORCE_TH_HALF, don't define CReal since
// FORCE_TH_HALF should only be used for TH types
#ifdef CUDA_HALF_TENSOR
#define CReal CudaHalf
#endif

#define THC_REAL_IS_HALF
#line 1 THC_GENERIC_FILE
Expand All @@ -23,14 +17,10 @@
#undef accreal
#undef Real

#ifdef CUDA_HALF_TENSOR
#undef CReal
#endif

#undef THC_REAL_IS_HALF

#endif // defined(CUDA_HALF_TENSOR) || defined(FORCE_TH_HALF)

#ifndef THCGenerateAllTypes
#ifndef THCGenerateFloatTypes
#undef THC_GENERIC_FILE
Expand Down
9 changes: 0 additions & 9 deletions aten/src/THC/THCHalf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,6 @@

#include "THCGeneral.h"

/* We compile with CudaHalfTensor support if we have this: */
#if CUDA_VERSION >= 7050 || CUDA_HAS_FP16 || defined(__HIP_PLATFORM_HCC__)
#define CUDA_HALF_TENSOR 1
#endif

#ifdef CUDA_HALF_TENSOR

#include <cuda_fp16.h>
#include <stdint.h>

Expand All @@ -30,6 +23,4 @@ THC_API int THC_nativeHalfInstructions(THCState *state);
/* Check for performant native fp16 support on the current device */
THC_API int THC_fastHalfInstructions(THCState *state);

#endif /* CUDA_HALF_TENSOR */

#endif
Loading

0 comments on commit 5adcac3

Please sign in to comment.