Skip to content

Commit

Permalink
[MTAI-484] fix(build): replace "MUSAAA" with "MUSA"
Browse files Browse the repository at this point in the history
  • Loading branch information
caizhi-mt committed Aug 4, 2023
1 parent ffec2fc commit 4445bc0
Show file tree
Hide file tree
Showing 21 changed files with 46 additions and 26 deletions.
3 changes: 3 additions & 0 deletions paddle/phi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,16 @@ file(
"gpu/c*.cu"
"gpu/s*.cu"
"gpu/abs_kernel.cu"
"gpu/uniform_kernel.cu"
"gpu/activation_kernel.cu"
"gpu/full_kernel.cu"
"gpu/p_norm_grad_kernel.cu"
"gpu/matmul_kernel.cu"
"gpu/expand_kernel.cu"
"gpu/isfinite_kernel.cu"
"kps/*.cu"
"legacy/gpu/uniform_kernel.cu"
"sparse/gpu/mask_kernel.cu"
"legacy/kps/*.cu"
)
list(REMOVE_ITEM kernel_cu
Expand Down
22 changes: 13 additions & 9 deletions paddle/phi/kernels/funcs/distribution_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,15 @@ struct normal_distribution<double> {
#elif defined(__MUSACC__)
template <typename T>
struct uniform_distribution {
__device__ inline T operator()(murandStatePhilox4_32_10_t *state) const {
__device__ inline T operator()(murand_state_philox4x32_10 *state) const {
return static_cast<T>(murand_uniform(state));
}
static constexpr int kReturnsCount = 1;
};

template <>
struct uniform_distribution<float> {
__device__ inline float4 operator()(murandStatePhilox4_32_10_t *state) const {
__device__ inline float4 operator()(murand_state_philox4x32_10 *state) const {
return murand_uniform4(state);
}
static constexpr int kReturnsCount = 4;
Expand All @@ -209,15 +209,15 @@ struct uniform_distribution<float> {
template <>
struct uniform_distribution<double> {
__device__ inline double2 operator()(
murandStatePhilox4_32_10_t *state) const {
return murand_uniform2_double(state);
murand_state_philox4x32_10 *state) const {
return murand_uniform_double2(state);
}
static constexpr int kReturnsCount = 2;
};

template <>
struct uniform_distribution<uint32_t> {
__device__ inline uint4 operator()(murandStatePhilox4_32_10_t *state) const {
__device__ inline uint4 operator()(murand_state_philox4x32_10 *state) const {
return murand4(state);
}
static constexpr int kReturnsCount = 4;
Expand All @@ -226,7 +226,7 @@ struct uniform_distribution<uint32_t> {
template <>
struct uniform_distribution<uint64_t> {
__device__ inline ulonglong2 operator()(
murandStatePhilox4_32_10_t *state) const {
murand_state_philox4x32_10 *state) const {
ulonglong2 result;
uint4 rand = murand4(state);
result.x = (uint64_t)rand.x << 32 | rand.y;
Expand All @@ -238,7 +238,7 @@ struct uniform_distribution<uint64_t> {

template <>
struct normal_distribution<float> {
__device__ inline float4 operator()(murandStatePhilox4_32_10_t *state) const {
__device__ inline float4 operator()(murand_state_philox4x32_10 *state) const {
return murand_normal4(state);
}
static constexpr int kReturnsCount = 4;
Expand All @@ -247,8 +247,8 @@ struct normal_distribution<float> {
template <>
struct normal_distribution<double> {
__device__ inline double2 operator()(
murandStatePhilox4_32_10_t *state) const {
return murand_normal2_double(state);
murand_state_philox4x32_10 *state) const {
return murand_normal_double2(state);
}
static constexpr int kReturnsCount = 2;
};
Expand Down Expand Up @@ -334,6 +334,10 @@ __global__ void DistributionKernel(size_t size,
curandStatePhilox4_32_10_t state;
curand_init(seed, idx + THREAD_ID_X, offset, &state);
using SType = curandStatePhilox4_32_10_t;
#elif defined(__MUSACC__)
murand_state_philox4x32_10 state;
murand_init(seed, idx + THREAD_ID_X, offset, &state);
using SType = murand_state_philox4x32_10;
#else
hiprandStatePhilox4_32_10_t state;
hiprand_init(seed, idx + THREAD_ID_X, offset, &state);
Expand Down
4 changes: 4 additions & 0 deletions paddle/phi/kernels/funcs/sparse/softmax.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ inline DenseTensor GetOffsets(const Context& dev_ctx,
const IntT dim) {
#ifdef __HIPCC__
const auto& policy = thrust::hip::par.on(dev_ctx.stream());
#elif defined(__MUSACC__)
const auto& policy = thrust::musa::par.on(dev_ctx.stream());
#else
const auto& policy = thrust::cuda::par.on(dev_ctx.stream());
#endif
Expand Down Expand Up @@ -87,6 +89,8 @@ std::tuple<DenseTensor, DenseTensor, DenseTensor, DenseTensor> ComputePoolMax(
const IntT dim) {
#ifdef __HIPCC__
const auto& policy = thrust::hip::par.on(dev_ctx.stream());
#elif defined(__MUSACC__)
const auto& policy = thrust::musa::par.on(dev_ctx.stream());
#else
const auto& policy = thrust::cuda::par.on(dev_ctx.stream());
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/reduce_amax_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void AMaxKernel(const Context& dev_ctx,
PD_REGISTER_KERNEL(
amax, CPU, ALL_LAYOUT, phi::AMaxKernel, float, double, int, int64_t) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(
amax, GPU, ALL_LAYOUT, phi::AMaxKernel, float, double, int, int64_t) {}
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/reduce_amin_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void AMinKernel(const Context& dev_ctx,
PD_REGISTER_KERNEL(
amin, CPU, ALL_LAYOUT, phi::AMinKernel, float, double, int, int64_t) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(
amin, GPU, ALL_LAYOUT, phi::AMinKernel, float, double, int, int64_t) {}
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/reduce_mean_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ PD_REGISTER_KERNEL(mean,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(mean,
GPU,
ALL_LAYOUT,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/reduce_min_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ PD_REGISTER_KERNEL(
min, GPU, ALL_LAYOUT, phi::MinKernel, float, double, int, int64_t) {}
#endif

#if defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(
min, GPU, ALL_LAYOUT, phi::MinKernel, float, double, int, int64_t, phi::dtype::float16) {}
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/selected_rows/activation_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ PD_REGISTER_KERNEL(
PD_REGISTER_KERNEL(
sqrt_sr, CPU, ALL_LAYOUT, phi::sr::SqrtKernel, float, double) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)

PD_REGISTER_KERNEL(square_sr,
GPU,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/selected_rows/assign_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ PD_REGISTER_KERNEL_FOR_ALL_DTYPE(assign_sr,
kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL_FOR_ALL_DTYPE(assign_sr,
GPU,
ALL_LAYOUT,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/selected_rows/full_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ PD_REGISTER_KERNEL(full_sr,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(full_sr,
GPU,
ALL_LAYOUT,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/selected_rows/isfinite_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ PD_REGISTER_KERNEL(isfinite_sr,
int,
int64_t) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(isinf_sr,
GPU,
ALL_LAYOUT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ PD_REGISTER_KERNEL(merge_selected_rows,
float,
double) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(merge_selected_rows,
GPU,
ALL_LAYOUT,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/selected_rows/scale_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ PD_REGISTER_KERNEL(scale_sr,
int,
int64_t) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(scale_sr,
GPU,
ALL_LAYOUT,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/selected_rows/shape_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ PD_REGISTER_KERNEL(shape_sr,
kernel->OutputAt(0).SetDataType(phi::DataType::INT32);
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(shape_sr,
GPU,
ALL_LAYOUT,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/selected_rows/uniform_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ PD_REGISTER_KERNEL(uniform_sr,
double,
phi::dtype::bfloat16) {}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)

PD_REGISTER_KERNEL(uniform_raw_sr,
GPU,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/sparse/gpu/conv.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ inline void CallThrustScan(const GPUContext& dev_ctx,
int* h_offsets_ptr) {
#ifdef PADDLE_WITH_HIP
thrust::exclusive_scan(thrust::hip::par.on(dev_ctx.stream()),
#elif definfed(PADDLE_WITH_MUSA)
#elif defined(PADDLE_WITH_MUSA)
thrust::exclusive_scan(thrust::musa::par.on(dev_ctx.stream()),
#else
thrust::exclusive_scan(thrust::cuda::par.on(dev_ctx.stream()),
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/sparse/gpu/slice_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ void SliceCsrTensor2D(const Context& dev_ctx,
out_crows_data);
#ifdef PADDLE_WITH_HIP
thrust::inclusive_scan(thrust::hip::par.on(dev_ctx.stream()),
#ifdef PADDLE_WITH_MUSA
#elif defined(PADDLE_WITH_MUSA)
thrust::inclusive_scan(thrust::musa::par.on(dev_ctx.stream()),
#else
thrust::inclusive_scan(thrust::cuda::par.on(dev_ctx.stream()),
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ void SoftmaxCooGradGPUKernel(const Context& dev_ctx,
bool is_same_offset = thrust::equal(thrust::hip::par.on(dev_ctx.stream()),
#elif defined(PADDLE_WITH_MUSA)
const auto& policy = thrust::musa::par.on(dev_ctx.stream());
bool is_same_offset = thrust::equal(thrust::hip::par.on(dev_ctx.stream()),
bool is_same_offset = thrust::equal(thrust::musa::par.on(dev_ctx.stream()),
#else
const auto& policy = thrust::cuda::par.on(dev_ctx.stream());
bool is_same_offset = thrust::equal(thrust::cuda::par.on(dev_ctx.stream()),
Expand Down
9 changes: 9 additions & 0 deletions paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,15 @@ void CsrToCooGPUKernel(const GPUContext& dev_ctx,
PADDLE_THROW(
phi::errors::Unimplemented("'rocsparse_csr2coo' only supports batches "
"with a value of 1 currently."));
#elif defined(PADDLE_WITH_MUSA)
auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, batches, 1);
GetBatchSizes<IntT><<<config.block_per_grid.x, config.thread_per_block.x>>>(
csr_crows_data, rows, batches, offsets_ptr);

thrust::exclusive_scan(thrust::musa::par.on(dev_ctx.stream()),
offsets_ptr,
offsets_ptr + batches,
offsets_ptr);
#else
auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, batches, 1);
GetBatchSizes<IntT><<<config.block_per_grid.x, config.thread_per_block.x>>>(
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ PD_REGISTER_KERNEL(sparse_coo_tensor_grad,
kernel->InputAt(1).SetDataLayout(phi::DataLayout::SPARSE_COO);
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL(values_coo_grad,
GPU,
ALL_LAYOUT,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/transfer_layout_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ PD_REGISTER_KERNEL_FOR_ALL_DTYPE(transfer_layout,
CPU,
ALL_LAYOUT,
phi::TransferLayoutKernel<phi::CPUContext>) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSAAA)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_MUSA)
PD_REGISTER_KERNEL_FOR_ALL_DTYPE(transfer_layout,
GPU,
ALL_LAYOUT,
Expand Down

0 comments on commit 4445bc0

Please sign in to comment.