Skip to content

Commit

Permalink
[ROCM] update fluid platform for rocm35 (part1), test=develop (#30639)
Browse files Browse the repository at this point in the history
* [ROCM] update fluid platform for rocm35 (part1), test=develop

* address review comments, test=develop
  • Loading branch information
qili93 authored Jan 28, 2021
1 parent fc00240 commit f89da4a
Show file tree
Hide file tree
Showing 19 changed files with 626 additions and 152 deletions.
10 changes: 10 additions & 0 deletions paddle/fluid/platform/bfloat16.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,17 @@ struct PADDLE_ALIGN(2) bfloat16 {
~bfloat16() = default;

HOSTDEVICE inline explicit bfloat16(float val) {
#ifdef PADDLE_WITH_HIP
uint32_t res = 0;
uint32_t* tempRes;
// We should be using memcpy in order to respect the strict aliasing rule
// but it fails in the HIP environment.
tempRes = reinterpret_cast<uint32_t*>(&val);
res = *tempRes;
x = res >> 16;
#else
std::memcpy(&x, reinterpret_cast<char*>(&val) + 2, 2);
#endif
}

template <class T>
Expand Down
43 changes: 28 additions & 15 deletions paddle/fluid/platform/complex128.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
#include <thrust/complex.h>
#endif // PADDLE_WITH_CUDA

#ifdef PADDLE_WITH_HIP
#include <hip/hip_complex.h>
#include <thrust/complex.h> // NOLINT
#endif

#include <cstring>

#include "paddle/fluid/platform/hostdevice.h"
Expand All @@ -54,7 +59,7 @@ struct PADDLE_ALIGN(16) complex128 {
~complex128() = default;

HOSTDEVICE complex128(double real, double imag) : real(real), imag(imag) {}
#if defined(PADDLE_WITH_CUDA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

HOSTDEVICE inline explicit complex128(const thrust::complex<double>& c) {
real = c.real();
Expand All @@ -65,9 +70,15 @@ struct PADDLE_ALIGN(16) complex128 {
return thrust::complex<double>(real, imag);
}

#ifdef PADDLE_WITH_HIP
HOSTDEVICE inline explicit operator hipDoubleComplex() const {
return make_hipDoubleComplex(real, imag);
}
#else
HOSTDEVICE inline explicit operator cuDoubleComplex() const {
return make_cuDoubleComplex(real, imag);
}
#endif
#endif

HOSTDEVICE complex128(const float& val)
Expand Down Expand Up @@ -202,7 +213,7 @@ struct PADDLE_ALIGN(16) complex128 {

HOSTDEVICE inline complex128 operator+(const complex128& a,
const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(thrust::complex<double>(a.real, a.imag) +
thrust::complex<double>(b.real, b.imag));
#else
Expand All @@ -212,7 +223,7 @@ HOSTDEVICE inline complex128 operator+(const complex128& a,

HOSTDEVICE inline complex128 operator-(const complex128& a,
const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(thrust::complex<double>(a.real, a.imag) -
thrust::complex<double>(b.real, b.imag));
#else
Expand All @@ -222,7 +233,7 @@ HOSTDEVICE inline complex128 operator-(const complex128& a,

HOSTDEVICE inline complex128 operator*(const complex128& a,
const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(thrust::complex<double>(a.real, a.imag) *
thrust::complex<double>(b.real, b.imag));
#else
Expand All @@ -233,7 +244,7 @@ HOSTDEVICE inline complex128 operator*(const complex128& a,

HOSTDEVICE inline complex128 operator/(const complex128& a,
const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(thrust::complex<double>(a.real, a.imag) /
thrust::complex<double>(b.real, b.imag));
#else
Expand All @@ -244,7 +255,7 @@ HOSTDEVICE inline complex128 operator/(const complex128& a,
}

HOSTDEVICE inline complex128 operator-(const complex128& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(-thrust::complex<double>(a.real, a.imag));
#else
complex128 res;
Expand All @@ -256,7 +267,7 @@ HOSTDEVICE inline complex128 operator-(const complex128& a) {

HOSTDEVICE inline complex128& operator+=(complex128& a, // NOLINT
const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
a = complex128(thrust::complex<double>(a.real, a.imag) +=
thrust::complex<double>(b.real, b.imag));
return a;
Expand All @@ -269,7 +280,7 @@ HOSTDEVICE inline complex128& operator+=(complex128& a, // NOLINT

HOSTDEVICE inline complex128& operator-=(complex128& a, // NOLINT
const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
a = complex128(thrust::complex<double>(a.real, a.imag) -=
thrust::complex<double>(b.real, b.imag));
return a;
Expand All @@ -282,7 +293,7 @@ HOSTDEVICE inline complex128& operator-=(complex128& a, // NOLINT

HOSTDEVICE inline complex128& operator*=(complex128& a, // NOLINT
const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
a = complex128(thrust::complex<double>(a.real, a.imag) *=
thrust::complex<double>(b.real, b.imag));
return a;
Expand All @@ -295,7 +306,7 @@ HOSTDEVICE inline complex128& operator*=(complex128& a, // NOLINT

HOSTDEVICE inline complex128& operator/=(complex128& a, // NOLINT
const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
a = complex128(thrust::complex<double>(a.real, a.imag) /=
thrust::complex<double>(b.real, b.imag));
return a;
Expand Down Expand Up @@ -339,6 +350,7 @@ HOSTDEVICE inline bool operator>=(const complex128& a, const complex128& b) {

HOSTDEVICE inline bool(isnan)(const complex128& a) {
#if defined(__CUDA_ARCH__)
// __isnanf not supported on HIP platform
return __isnan(a.real) || __isnan(a.imag);
#else
return std::isnan(a.real) || std::isnan(a.imag);
Expand All @@ -347,6 +359,7 @@ HOSTDEVICE inline bool(isnan)(const complex128& a) {

HOSTDEVICE inline bool(isinf)(const complex128& a) {
#if defined(__CUDA_ARCH__)
// __isinf not supported on HIP platform
return __isinf(a.real) || __isinf(a.imag);
#else
return std::isinf(a.real) || std::isinf(a.imag);
Expand All @@ -358,15 +371,15 @@ HOSTDEVICE inline bool(isfinite)(const complex128& a) {
}

HOSTDEVICE inline double(abs)(const complex128& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return thrust::abs(thrust::complex<double>(a.real, a.imag));
#else
return std::abs(std::complex<double>(a.real, a.imag));
#endif
}

HOSTDEVICE inline complex128(pow)(const complex128& a, const complex128& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(thrust::pow(thrust::complex<double>(a.real, a.imag),
thrust::complex<double>(b.real, b.imag)));
#else
Expand All @@ -375,23 +388,23 @@ HOSTDEVICE inline complex128(pow)(const complex128& a, const complex128& b) {
}

HOSTDEVICE inline complex128(sqrt)(const complex128& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(thrust::sqrt(thrust::complex<double>(a.real, a.imag)));
#else
return std::sqrt(std::complex<double>(a));
#endif
}

HOSTDEVICE inline complex128(tanh)(const complex128& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(thrust::tanh(thrust::complex<double>(a.real, a.imag)));
#else
return std::tanh(std::complex<double>(a));
#endif
}

HOSTDEVICE inline complex128(log)(const complex128& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex128(thrust::log(thrust::complex<double>(a.real, a.imag)));
#else
return complex128(std::log(std::complex<double>(a)));
Expand Down
43 changes: 28 additions & 15 deletions paddle/fluid/platform/complex64.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@
#include <thrust/complex.h>
#endif // PADDLE_WITH_CUDA

#ifdef PADDLE_WITH_HIP
#include <hip/hip_complex.h>
#include <thrust/complex.h> // NOLINT
#endif

#include <cstring>

#include "paddle/fluid/platform/complex128.h"
Expand Down Expand Up @@ -54,7 +59,7 @@ struct PADDLE_ALIGN(8) complex64 {
~complex64() = default;

HOSTDEVICE complex64(float real, float imag) : real(real), imag(imag) {}
#if defined(PADDLE_WITH_CUDA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

HOSTDEVICE inline explicit complex64(const thrust::complex<float>& c) {
real = c.real();
Expand All @@ -65,9 +70,15 @@ struct PADDLE_ALIGN(8) complex64 {
return thrust::complex<float>(real, imag);
}

#ifdef PADDLE_WITH_HIP
HOSTDEVICE inline explicit operator hipFloatComplex() const {
return make_hipFloatComplex(real, imag);
}
#else
HOSTDEVICE inline explicit operator cuFloatComplex() const {
return make_cuFloatComplex(real, imag);
}
#endif
#endif

HOSTDEVICE complex64(const float& val) : real(val), imag(0) {}
Expand Down Expand Up @@ -207,7 +218,7 @@ struct PADDLE_ALIGN(8) complex64 {
};

HOSTDEVICE inline complex64 operator+(const complex64& a, const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::complex<float>(a.real, a.imag) +
thrust::complex<float>(b.real, b.imag));
#else
Expand All @@ -216,7 +227,7 @@ HOSTDEVICE inline complex64 operator+(const complex64& a, const complex64& b) {
}

HOSTDEVICE inline complex64 operator-(const complex64& a, const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::complex<float>(a.real, a.imag) -
thrust::complex<float>(b.real, b.imag));
#else
Expand All @@ -225,7 +236,7 @@ HOSTDEVICE inline complex64 operator-(const complex64& a, const complex64& b) {
}

HOSTDEVICE inline complex64 operator*(const complex64& a, const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::complex<float>(a.real, a.imag) *
thrust::complex<float>(b.real, b.imag));
#else
Expand All @@ -235,7 +246,7 @@ HOSTDEVICE inline complex64 operator*(const complex64& a, const complex64& b) {
}

HOSTDEVICE inline complex64 operator/(const complex64& a, const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::complex<float>(a.real, a.imag) /
thrust::complex<float>(b.real, b.imag));
#else
Expand All @@ -246,7 +257,7 @@ HOSTDEVICE inline complex64 operator/(const complex64& a, const complex64& b) {
}

HOSTDEVICE inline complex64 operator-(const complex64& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(-thrust::complex<float>(a.real, a.imag));
#else
complex64 res;
Expand All @@ -258,7 +269,7 @@ HOSTDEVICE inline complex64 operator-(const complex64& a) {

HOSTDEVICE inline complex64& operator+=(complex64& a, // NOLINT
const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
a = complex64(thrust::complex<float>(a.real, a.imag) +=
thrust::complex<float>(b.real, b.imag));
return a;
Expand All @@ -271,7 +282,7 @@ HOSTDEVICE inline complex64& operator+=(complex64& a, // NOLINT

HOSTDEVICE inline complex64& operator-=(complex64& a, // NOLINT
const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
a = complex64(thrust::complex<float>(a.real, a.imag) -=
thrust::complex<float>(b.real, b.imag));
return a;
Expand All @@ -284,7 +295,7 @@ HOSTDEVICE inline complex64& operator-=(complex64& a, // NOLINT

HOSTDEVICE inline complex64& operator*=(complex64& a, // NOLINT
const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
a = complex64(thrust::complex<float>(a.real, a.imag) *=
thrust::complex<float>(b.real, b.imag));
return a;
Expand All @@ -297,7 +308,7 @@ HOSTDEVICE inline complex64& operator*=(complex64& a, // NOLINT

HOSTDEVICE inline complex64& operator/=(complex64& a, // NOLINT
const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
a = complex64(thrust::complex<float>(a.real, a.imag) /=
thrust::complex<float>(b.real, b.imag));
return a;
Expand Down Expand Up @@ -341,6 +352,7 @@ HOSTDEVICE inline bool operator>=(const complex64& a, const complex64& b) {

HOSTDEVICE inline bool(isnan)(const complex64& a) {
#if defined(__CUDA_ARCH__)
// __isnanf not supported on HIP platform
return __isnanf(a.real) || __isnanf(a.imag);
#else
return std::isnan(a.real) || std::isnan(a.imag);
Expand All @@ -349,6 +361,7 @@ HOSTDEVICE inline bool(isnan)(const complex64& a) {

HOSTDEVICE inline bool(isinf)(const complex64& a) {
#if defined(__CUDA_ARCH__)
// __isinff not supported on HIP platform
return __isinff(a.real) || __isinff(a.imag);
#else
return std::isinf(a.real) || std::isinf(a.imag);
Expand All @@ -360,15 +373,15 @@ HOSTDEVICE inline bool(isfinite)(const complex64& a) {
}

HOSTDEVICE inline float(abs)(const complex64& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::abs(thrust::complex<float>(a.real, a.imag)));
#else
return std::abs(std::complex<float>(a.real, a.imag));
#endif
}

HOSTDEVICE inline complex64(pow)(const complex64& a, const complex64& b) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::pow(thrust::complex<float>(a.real, a.imag),
thrust::complex<float>(b.real, b.imag)));
#else
Expand All @@ -377,23 +390,23 @@ HOSTDEVICE inline complex64(pow)(const complex64& a, const complex64& b) {
}

HOSTDEVICE inline complex64(sqrt)(const complex64& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::sqrt(thrust::complex<float>(a.real, a.imag)));
#else
return std::sqrt(std::complex<float>(a));
#endif
}

HOSTDEVICE inline complex64(tanh)(const complex64& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::tanh(thrust::complex<float>(a.real, a.imag)));
#else
return std::tanh(std::complex<float>(a));
#endif
}

HOSTDEVICE inline complex64(log)(const complex64& a) {
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__) || defined(__HIPCC__)
return complex64(thrust::log(thrust::complex<float>(a.real, a.imag)));
#else
return std::log(std::complex<float>(a));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

#pragma once

#ifndef __NVCC__
#if !defined(__NVCC__) && !defined(__HIPCC__)
#error device_ptr_cast must be include by .cu file
#endif

Expand Down
Loading

0 comments on commit f89da4a

Please sign in to comment.