From 4fad3e73fa154f7a160a5c6e3668189724e086d2 Mon Sep 17 00:00:00 2001 From: zhouwei25 Date: Wed, 15 Jun 2022 15:11:27 +0000 Subject: [PATCH 1/2] add new API/OP(csr->csr) of SparseTensor softmax --- .../kernels/sparse/cpu/softmax_grad_kernel.cc | 91 ++++++++++++++ .../phi/kernels/sparse/cpu/softmax_kernel.cc | 92 ++++++++++++++ paddle/phi/kernels/sparse/empty_kernel.cc | 78 ++++++++++++ paddle/phi/kernels/sparse/empty_kernel.h | 28 +++++ .../kernels/sparse/gpu/softmax_grad_kernel.cu | 102 +++++++++++++++ .../phi/kernels/sparse/gpu/softmax_kernel.cu | 116 ++++++++++++++++++ .../phi/kernels/sparse/softmax_grad_kernel.h | 30 +++++ paddle/phi/kernels/sparse/softmax_kernel.h | 29 +++++ .../tests/unittests/test_sparse_softmax.py | 77 ++++++++++++ .../tests/unittests/test_sparse_unary_op.py | 2 +- python/paddle/incubate/sparse/binary.py | 2 +- python/paddle/incubate/sparse/nn/__init__.py | 2 + .../incubate/sparse/nn/functional/__init__.py | 2 + .../sparse/nn/functional/activation.py | 67 ++++++++-- .../incubate/sparse/nn/layer/activation.py | 69 +++++++++++ python/paddle/incubate/sparse/unary.py | 38 ++---- python/paddle/utils/code_gen/sparse_api.yaml | 8 ++ .../paddle/utils/code_gen/sparse_bw_api.yaml | 7 ++ 18 files changed, 802 insertions(+), 38 deletions(-) create mode 100644 paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc create mode 100644 paddle/phi/kernels/sparse/cpu/softmax_kernel.cc create mode 100644 paddle/phi/kernels/sparse/empty_kernel.cc create mode 100644 paddle/phi/kernels/sparse/empty_kernel.h create mode 100644 paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu create mode 100644 paddle/phi/kernels/sparse/gpu/softmax_kernel.cu create mode 100644 paddle/phi/kernels/sparse/softmax_grad_kernel.h create mode 100644 paddle/phi/kernels/sparse/softmax_kernel.h create mode 100644 python/paddle/fluid/tests/unittests/test_sparse_softmax.py diff --git a/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc new file mode 100644 index 0000000000000..a97718436a3f6 --- /dev/null +++ b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc @@ -0,0 +1,91 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/sparse/softmax_grad_kernel.h" + +#include "paddle/fluid/platform/cpu_info.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/visit_type.h" +#include "paddle/phi/kernels/funcs/cpu_vec.h" +#include "paddle/phi/kernels/sparse/empty_kernel.h" + +namespace plt = paddle::platform; + +namespace phi { +namespace sparse { + +template +void SoftmaxCsrGradKernel(const Context& dev_ctx, + const SparseCsrTensor& out, + const SparseCsrTensor& dout, + int axis, + SparseCsrTensor* dx) { + PADDLE_ENFORCE_EQ(axis, + -1, + phi::errors::Unimplemented( + "SparseCsrTensor only support axis=-1 for softmax, " + "which is faster when reading data by row (axis=-1)")); + EmptyLikeCsrKernel(dev_ctx, dout, dx); + + auto out_dim = out.dims(); + int rows = 1; + for (int i = 0; i < out_dim.size() - 1; ++i) { + rows *= out_dim[i]; + } + + const DenseTensor& out_crows = out.non_zero_crows(); + const DenseTensor& out_values = out.non_zero_elements(); + const DenseTensor& dout_values = dout.non_zero_elements(); + DenseTensor* dx_values = dx->mutable_non_zero_elements(); + + int row_first = 0; + int row_nnz = 0; + const T* out_data = out_values.data(); + const T* dout_data = dout_values.data(); + T* dx_data = dx_values->data(); + + // dx = (dout - sum(dout * out)) * out + PD_VISIT_INTEGRAL_TYPES( + out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] { + const data_t* out_crows_data = out_crows.data(); + for (int i = 0; i < rows; ++i) { + row_first = static_cast(out_crows_data[i]); + row_nnz = static_cast(out_crows_data[i + 1] - out_crows_data[i]); + + out_data = out_data + row_first; + dout_data = dout_data + row_first; + dx_data = dx_data + row_first; + + T sum = 0; + phi::funcs::vec_mul_reduce( + row_nnz, dout_data, out_data, &sum); + phi::funcs::vec_add_bias( + row_nnz, static_cast(-1) * sum, dout_data, dx_data); + phi::funcs::vec_mul(row_nnz, dx_data, out_data, dx_data); + } + })); +} + +} // namespace sparse +} // namespace phi + +PD_REGISTER_KERNEL(softmax_csr_grad, + CPU, + ALL_LAYOUT, + phi::sparse::SoftmaxCsrGradKernel, + float, + double) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); +} diff --git a/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc new file mode 100644 index 0000000000000..46ff09004cdcc --- /dev/null +++ b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc @@ -0,0 +1,92 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/sparse/softmax_kernel.h" + +#include "paddle/fluid/platform/cpu_info.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/visit_type.h" +#include "paddle/phi/kernels/funcs/cpu_vec.h" +#include "paddle/phi/kernels/sparse/empty_kernel.h" + +namespace plt = paddle::platform; + +namespace phi { +namespace sparse { + +template +void SoftmaxCsrKernel(const Context& dev_ctx, + const SparseCsrTensor& x, + int axis, + SparseCsrTensor* out) { + PADDLE_ENFORCE_EQ(axis, + -1, + phi::errors::Unimplemented( + "SparseCsrTensor only support axis=-1 for softmax, " + "which is faster when reading data by row (axis=-1)")); + EmptyLikeCsrKernel(dev_ctx, x, out); + + auto x_dim = x.dims(); + int row_number = 1; + for (int i = 0; i < x_dim.size() - 1; ++i) { + row_number *= x_dim[i]; + } + + const DenseTensor& x_crows = x.non_zero_crows(); + const DenseTensor& x_values = x.non_zero_elements(); + DenseTensor* out_values = out->mutable_non_zero_elements(); + + int row_first = 0; + int row_nnz = 0; + T row_max_val = 0; + const T* x_data = x_values.data(); + T* out_data = out_values->data(); + + // out = exp(x-x_max) / sum( exp(x-x_max )) + PD_VISIT_INTEGRAL_TYPES( + x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] { + const data_t* x_crows_data = x_crows.data(); + for (int i = 0; i < row_number; ++i) { + row_first = static_cast(x_crows_data[i]); + row_nnz = static_cast(x_crows_data[i + 1] - x_crows_data[i]); + + x_data = x_data + row_first; + out_data = out_data + row_first; + + row_max_val = *std::max_element(x_data, x_data + row_nnz); + phi::funcs::vec_add_bias( + row_nnz, static_cast(-1) * row_max_val, x_data, out_data); + + phi::funcs::vec_exp(row_nnz, out_data, out_data); + + T sum = 0; + phi::funcs::vec_sum(row_nnz, out_data, &sum); + phi::funcs::vec_scal( + row_nnz, static_cast(1) / sum, out_data, out_data); + } + })); +} + +} // namespace sparse +} // namespace phi + +PD_REGISTER_KERNEL(softmax_csr, + CPU, + ALL_LAYOUT, + phi::sparse::SoftmaxCsrKernel, + float, + double) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); +} diff --git a/paddle/phi/kernels/sparse/empty_kernel.cc b/paddle/phi/kernels/sparse/empty_kernel.cc new file mode 100644 index 0000000000000..b8e164de534cf --- /dev/null +++ b/paddle/phi/kernels/sparse/empty_kernel.cc @@ -0,0 +1,78 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/sparse/empty_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" + +namespace phi { +namespace sparse { + +template +void EmptyLikeCsrKernel(const Context& dev_ctx, + const SparseCsrTensor& x, + SparseCsrTensor* out) { + const DenseTensor& x_crows = x.non_zero_crows(); + const DenseTensor& x_cols = x.non_zero_cols(); + const DenseTensor& x_values = x.non_zero_elements(); + + DenseTensor* out_crows = out->mutable_non_zero_crows(); + DenseTensor* out_cols = out->mutable_non_zero_cols(); + DenseTensor* out_values = out->mutable_non_zero_elements(); + + out->set_dims(x.dims()); + phi::Copy(dev_ctx, x_crows, dev_ctx.GetPlace(), false, out_crows); + phi::Copy(dev_ctx, x_cols, dev_ctx.GetPlace(), false, out_cols); + + out_values->Resize(x_values.dims()); + dev_ctx.template Alloc(out_values); +} + +} // namespace sparse +} // namespace phi + +PD_REGISTER_KERNEL(empty_like_csr, + CPU, + ALL_LAYOUT, + phi::sparse::EmptyLikeCsrKernel, + float, + double, + int8_t, + uint8_t, + int16_t, + int, + int64_t, + bool) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); +} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_REGISTER_KERNEL(empty_like_csr, + GPU, + ALL_LAYOUT, + phi::sparse::EmptyLikeCsrKernel, + float, + double, + int8_t, + uint8_t, + int16_t, + int, + int64_t, + bool) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); +} +#endif diff --git a/paddle/phi/kernels/sparse/empty_kernel.h b/paddle/phi/kernels/sparse/empty_kernel.h new file mode 100644 index 0000000000000..9656f3e7b3b9f --- /dev/null +++ b/paddle/phi/kernels/sparse/empty_kernel.h @@ -0,0 +1,28 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/sparse_csr_tensor.h" + +namespace phi { +namespace sparse { + +template +void EmptyLikeCsrKernel(const Context& dev_ctx, + const SparseCsrTensor& x, + SparseCsrTensor* out); + +} // namespace sparse +} // namespace phi diff --git a/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu new file mode 100644 index 0000000000000..0eb0acc3fc9a3 --- /dev/null +++ b/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu @@ -0,0 +1,102 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/visit_type.h" +#include "paddle/phi/kernels/funcs/math_cuda_utils.h" +#include "paddle/phi/kernels/sparse/empty_kernel.h" +#include "paddle/phi/kernels/sparse/softmax_grad_kernel.h" + +namespace phi { +namespace sparse { + +template +__global__ void SoftmaxGradGpuKernel(const IntT* out_crows, + const T* out_values, + const T* dout_values, + T* dx_values, + int row_number) { + // dx = (dout - sum(dout * out)) * out + int row = blockIdx.x * blockDim.y + threadIdx.y; + int non_zero_idx = threadIdx.x; + if (row >= row_number) return; + int row_first = static_cast(out_crows[row]); + int row_nnz = static_cast(out_crows[row + 1] - out_crows[row]); + if (row_nnz == 0) return; + + int kIteration = (row_nnz + warpSize - 1) / warpSize; + + T mul_result = 0; + for (int i = 0; i < kIteration; ++i) { + int idx = non_zero_idx + i * warpSize; + if (idx >= row_nnz) break; + + mul_result += out_values[row_first + idx] * dout_values[row_first + idx]; + } + T sum = phi::funcs::warpReduceSum(mul_result, 0xFFFFFFFF); + + for (int i = 0; i < kIteration; ++i) { + int idx = non_zero_idx + i * warpSize; + if (idx >= row_nnz) break; + + dx_values[row_first + idx] = + (dout_values[row_first + idx] - sum) * out_values[row_first + idx]; + } +} + +template +void SoftmaxCsrGradKernel(const Context& dev_ctx, + const SparseCsrTensor& out, + const SparseCsrTensor& dout, + int axis, + SparseCsrTensor* dx) { + PADDLE_ENFORCE_EQ(axis, + -1, + phi::errors::Unimplemented( + "SparseCsrTensor only support axis=-1 for softmax, " + "which is faster when reading data by row (axis=-1)")); + EmptyLikeCsrKernel(dev_ctx, dout, dx); + + auto out_dim = out.dims(); + int row_number = 1; + for (int i = 0; i < out_dim.size() - 1; ++i) { + row_number *= out_dim[i]; + } + + dim3 grid((row_number + 3) / 4); + dim3 block(32, 4); + + PD_VISIT_INTEGRAL_TYPES( + out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] { + SoftmaxGradGpuKernel<<>>( + out.non_zero_crows().data(), + out.non_zero_elements().data(), + dout.non_zero_elements().data(), + dx->mutable_non_zero_elements()->data(), + row_number); + })); +} + +} // namespace sparse +} // namespace phi + +PD_REGISTER_KERNEL(softmax_csr_grad, + GPU, + ALL_LAYOUT, + phi::sparse::SoftmaxCsrGradKernel, + float, + double) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); +} diff --git a/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu b/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu new file mode 100644 index 0000000000000..a3bd396dd9c72 --- /dev/null +++ b/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu @@ -0,0 +1,116 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/visit_type.h" +#include "paddle/phi/kernels/empty_kernel.h" +#include "paddle/phi/kernels/funcs/activation_functor.h" +#include "paddle/phi/kernels/funcs/math_cuda_utils.h" +#include "paddle/phi/kernels/sparse/empty_kernel.h" +#include "paddle/phi/kernels/sparse/softmax_kernel.h" + +namespace phi { +namespace sparse { + +template +__global__ void SoftmaxGpuKernel(const IntT* x_crows, + const T* x_values, + T* out_values, + int row_number) { + // out = exp(x-x_max) / sum(exp(x-x_max)) + int row = blockIdx.x * blockDim.y + threadIdx.y; + int non_zero_idx = threadIdx.x; + if (row >= row_number) return; + int row_first = static_cast(x_crows[row]); + int row_nnz = static_cast(x_crows[row + 1] - x_crows[row]); + if (row_nnz == 0) return; + + int kIteration = (row_nnz + warpSize - 1) / warpSize; + + T max_val = -std::numeric_limits::infinity(); + for (int i = 0; i < kIteration; ++i) { + int idx = non_zero_idx + i * warpSize; + if (idx >= row_nnz) break; + + if (max_val < x_values[row_first + idx]) { + max_val = x_values[row_first + idx]; + } + } + T row_max_val = phi::funcs::warpReduceMax(max_val, 0xFFFFFFFF); + + T exp_sum = 0; + for (int i = 0; i < kIteration; ++i) { + int idx = non_zero_idx + i * warpSize; + if (idx >= row_nnz) break; + + auto functor = phi::funcs::CudaExpFunctor(); + out_values[row_first + idx] = + functor(x_values[row_first + idx] - row_max_val); + exp_sum += functor(x_values[row_first + idx] - row_max_val); + } + T row_exp_sum = phi::funcs::warpReduceSum(exp_sum, 0xFFFFFFFF); + + for (int i = 0; i < kIteration; ++i) { + int idx = non_zero_idx + i * warpSize; + if (idx >= row_nnz) break; + + out_values[row_first + idx] = out_values[row_first + idx] / row_exp_sum; + } +} + +template +void SoftmaxCsrKernel(const Context& dev_ctx, + const SparseCsrTensor& x, + int axis, + SparseCsrTensor* out) { + PADDLE_ENFORCE_EQ(axis, + -1, + phi::errors::Unimplemented( + "SparseCsrTensor only support axis=-1 for softmax, " + "which is faster when reading data by row (axis=-1)")); + EmptyLikeCsrKernel(dev_ctx, x, out); + + auto x_dim = x.dims(); + int row_number = 1; + for (int i = 0; i < x_dim.size() - 1; ++i) { + row_number *= x_dim[i]; + } + dim3 grid((row_number + 3) / 4); + dim3 block(32, 4); + + DenseTensor tmp_tensor = + phi::EmptyLike(dev_ctx, x.non_zero_elements()); + + PD_VISIT_INTEGRAL_TYPES(x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] { + SoftmaxGpuKernel + <<>>( + x.non_zero_crows().data(), + x.non_zero_elements().data(), + out->mutable_non_zero_elements()->data(), + row_number); + })); +} + +} // namespace sparse +} // namespace phi + +PD_REGISTER_KERNEL(softmax_csr, + GPU, + ALL_LAYOUT, + phi::sparse::SoftmaxCsrKernel, + float, + double) { + kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_CSR); +} diff --git a/paddle/phi/kernels/sparse/softmax_grad_kernel.h b/paddle/phi/kernels/sparse/softmax_grad_kernel.h new file mode 100644 index 0000000000000..abc5c26e8a531 --- /dev/null +++ b/paddle/phi/kernels/sparse/softmax_grad_kernel.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/sparse_csr_tensor.h" + +namespace phi { +namespace sparse { + +template +void SoftmaxCsrGradKernel(const Context& dev_ctx, + const SparseCsrTensor& out, + const SparseCsrTensor& dout, + int axis, + SparseCsrTensor* dx); + +} // namespace sparse +} // namespace phi diff --git a/paddle/phi/kernels/sparse/softmax_kernel.h b/paddle/phi/kernels/sparse/softmax_kernel.h new file mode 100644 index 0000000000000..13f25dc7d23fc --- /dev/null +++ b/paddle/phi/kernels/sparse/softmax_kernel.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/sparse_csr_tensor.h" + +namespace phi { +namespace sparse { + +template +void SoftmaxCsrKernel(const Context& dev_ctx, + const SparseCsrTensor& X, + int axis, + SparseCsrTensor* out); + +} // namespace sparse +} // namespace phi diff --git a/python/paddle/fluid/tests/unittests/test_sparse_softmax.py b/python/paddle/fluid/tests/unittests/test_sparse_softmax.py new file mode 100644 index 0000000000000..5f33a9e474ae5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sparse_softmax.py @@ -0,0 +1,77 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle.fluid.framework import _test_eager_guard + +import numpy as np +import scipy +import scipy.sparse as sp +import unittest +import os +import re +import math + +np.random.seed(2022) + + +class TestCsrSoftmax(unittest.TestCase): + + def test_softmax(self): + with _test_eager_guard(): + mask = np.random.rand(1, 5) < 0.5 + np_x = np.random.rand(1, 5) * mask + np_csr = sp.csr_matrix(np_x) + + row_number = np_csr.shape[0] + np_out = np.array([]) + for i in range(row_number): + start = np_csr.indptr[i] + end = np_csr.indptr[i + 1] + if start == end: + continue + x = np_csr.data[start:end] + x_max = np.max(x, keepdims=True) + x_exp = np.exp(x - x_max) + x_exp_sum = np.sum(x_exp, keepdims=True) + np_out = np.concatenate([np_out, x_exp / x_exp_sum]) + + csr = paddle.to_tensor(np_x, stop_gradient=False).to_sparse_csr() + m = paddle.incubate.sparse.nn.Softmax() + out = m(csr) + self.assertTrue(np.allclose(out.crows().numpy(), np_csr.indptr)) + self.assertTrue(np.allclose(out.cols().numpy(), np_csr.indices)) + self.assertTrue(np.allclose(out.values().numpy(), np_out)) + + # dx = (dout - sum(dout * out)) * out, dout=rand_x + out.backward(csr.detach()) + for i in range(row_number): + start = np_csr.indptr[i] + end = np_csr.indptr[i + 1] + if start == end: + continue + out = np_out[start:end] + dout = np_csr.data[start:end] + sum = np.sum(dout * out, keepdims=True) + dx = (dout - sum) * out + + self.assertTrue(np.allclose(csr.grad.crows().numpy(), + np_csr.indptr)) + self.assertTrue(np.allclose(csr.grad.cols().numpy(), + np_csr.indices)) + self.assertTrue(np.allclose(csr.grad.values().numpy(), dx)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py index 3fd6665b26d8c..2272022e8d6dc 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py @@ -27,7 +27,7 @@ class TestSparseUnary(unittest.TestCase): def assert_raises_on_dense_tensor(self, sparse_func): with _test_eager_guard(): dense_x = paddle.ones((2, 3)) - with self.assertRaises(ValueError): + with self.assertRaises(NotImplementedError): sparse_func(dense_x) def compare_with_dense( diff --git a/python/paddle/incubate/sparse/binary.py b/python/paddle/incubate/sparse/binary.py index f03cd985201fd..a158e7d6845c9 100644 --- a/python/paddle/incubate/sparse/binary.py +++ b/python/paddle/incubate/sparse/binary.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.common_ops_import import dygraph_only from paddle import _C_ops +from paddle.fluid.framework import dygraph_only __all__ = [] diff --git a/python/paddle/incubate/sparse/nn/__init__.py b/python/paddle/incubate/sparse/nn/__init__.py index be4985e694b4b..0df4d02414654 100644 --- a/python/paddle/incubate/sparse/nn/__init__.py +++ b/python/paddle/incubate/sparse/nn/__init__.py @@ -15,6 +15,7 @@ from . import functional from .layer.activation import ReLU +from .layer.activation import Softmax from .layer.norm import BatchNorm from .layer.conv import Conv3D from .layer.conv import SubmConv3D @@ -22,6 +23,7 @@ __all__ = [ 'ReLU', + 'Softmax', 'BatchNorm', 'Conv3D', 'SubmConv3D', diff --git a/python/paddle/incubate/sparse/nn/functional/__init__.py b/python/paddle/incubate/sparse/nn/functional/__init__.py index a16a8a8240a23..af5636a622a9a 100644 --- a/python/paddle/incubate/sparse/nn/functional/__init__.py +++ b/python/paddle/incubate/sparse/nn/functional/__init__.py @@ -16,10 +16,12 @@ from .conv import subm_conv3d # noqa: F401 from .pooling import max_pool3d # noqa: F401 from .activation import relu # noqa: F401 +from .activation import softmax # noqa: F401 __all__ = [ 'conv3d', 'subm_conv3d', 'max_pool3d', 'relu', + 'softmax', ] diff --git a/python/paddle/incubate/sparse/nn/functional/activation.py b/python/paddle/incubate/sparse/nn/functional/activation.py index 3396cc53cc479..3d4e77010b25f 100644 --- a/python/paddle/incubate/sparse/nn/functional/activation.py +++ b/python/paddle/incubate/sparse/nn/functional/activation.py @@ -15,8 +15,10 @@ __all__ = [] from paddle import _C_ops, in_dynamic_mode +from paddle.fluid.framework import dygraph_only +@dygraph_only def relu(x, name=None): """ sparse relu activation, requiring x to be a sparse coo or sparse csr tensor. @@ -44,12 +46,63 @@ def relu(x, name=None): sparse_x = dense_x.to_sparse_coo(1) out = paddle.incubate.sparse.nn.functional.relu(sparse_x) """ + return _C_ops.final_state_sparse_relu(x) - assert in_dynamic_mode(), "Currently, Sparse API only support dynamic mode" - if x.is_sparse_coo() or x.is_sparse_csr(): - return _C_ops.final_state_sparse_relu(x) - else: - raise ValueError( - "Currently, sparse.relu only support the input of SparseCooTensor or SparseCsrTensor" - ) +@dygraph_only +def softmax(x, axis=-1, name=None): + """ + sparse softmax activation, x must be SparseCsrTensor or SparseCooTensor. + + Note: + Only supported axis=-1 for SparseCsrTensor, which is faster when read data + by row (axis=-1). + + From the point of view of dense matrix, for each row :math:`i` and each column :math:`j` + in the matrix, we have: + + .. math:: + + softmax_ij = \frac{\exp(x_ij - max_j(x_ij))}{\sum_j(exp(x_ij - max_j(x_ij))} + + Parameters: + x (Tensor): The input tensor. It can be SparseCooTensor/SparseCsrTensor. The data type can be float32 or float64. + axis (int, optional): The axis along which to perform softmax calculations. Only support -1 for SparseCsrTensor. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor: SparseCoo or SparseCsr, whose layout is the same with `x` . + + Examples: + .. code-block:: python + + import paddle + import numpy as np + from paddle.fluid.framework import _test_eager_guard + + paddle.seed(100) + + with _test_eager_guard(): + mask = np.random.rand(3, 4) < 0.5 + np_x = np.random.rand(3, 4) * mask + # [[0. 0. 0.96823406 0.19722934] + # [0.94373937 0. 0.02060066 0.71456372] + # [0. 0. 0. 0.98275049]] + + csr = paddle.to_tensor(np_x).to_sparse_csr() + # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, + # crows=[0, 2, 5, 6], + # cols=[2, 3, 0, 2, 3, 3], + # values=[0.96823406, 0.19722934, 0.94373937, 0.02060066, 0.71456372, + # 0.98275049]) + + out = paddle.incubate.sparse.nn.functional.softmax(csr) + # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, + # crows=[0, 2, 5, 6], + # cols=[2, 3, 0, 2, 3, 3], + # values=[0.68373820, 0.31626180, 0.45610887, 0.18119845, 0.36269269, + # 1. ]) + + """ + return _C_ops.final_state_sparse_softmax(x, axis) diff --git a/python/paddle/incubate/sparse/nn/layer/activation.py b/python/paddle/incubate/sparse/nn/layer/activation.py index 75285eb11adc2..011fef90c0889 100644 --- a/python/paddle/incubate/sparse/nn/layer/activation.py +++ b/python/paddle/incubate/sparse/nn/layer/activation.py @@ -59,3 +59,72 @@ def forward(self, x): def extra_repr(self): name_str = 'name={}'.format(self._name) if self._name else '' return name_str + + +class Softmax(Layer): + """ + sparse softmax activation, x must be SparseCsrTensor or SparseCooTensor. + + Note: + Only supported axis=-1 for SparseCsrTensor, which is faster when read data + by row (axis=-1). + + From the point of view of dense matrix, for each row :math:`i` and each column :math:`j` + in the matrix, we have: + + .. math:: + + softmax_ij = \frac{\exp(x_ij - max_j(x_ij))}{\sum_j(exp(x_ij - max_j(x_ij))} + + Parameters: + axis (int, optional): The axis along which to perform softmax calculations. Only support -1 for SparseCsrTensor. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: SparseCooTensor / SparseCsrTensor with any shape. + - output: Sparse Tensor with the same shape as input. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + from paddle.fluid.framework import _test_eager_guard + + paddle.seed(100) + + with _test_eager_guard(): + mask = np.random.rand(3, 4) < 0.5 + np_x = np.random.rand(3, 4) * mask + # [[0. 0. 0.96823406 0.19722934] + # [0.94373937 0. 0.02060066 0.71456372] + # [0. 0. 0. 0.98275049]] + + csr = paddle.to_tensor(np_x).to_sparse_csr() + # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, + # crows=[0, 2, 5, 6], + # cols=[2, 3, 0, 2, 3, 3], + # values=[0.96823406, 0.19722934, 0.94373937, 0.02060066, 0.71456372, + # 0.98275049]) + + m = paddle.incubate.sparse.nn.Softmax() + out = m(csr) + # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, + # crows=[0, 2, 5, 6], + # cols=[2, 3, 0, 2, 3, 3], + # values=[0.68373820, 0.31626180, 0.45610887, 0.18119845, 0.36269269, + # 1. ]) + """ + + def __init__(self, axis=-1, name=None): + super(Softmax, self).__init__() + self._axis = axis + self._name = name + + def forward(self, x): + return F.softmax(x, self._axis, self._name) + + def extra_repr(self): + name_str = 'name={}'.format(self._name) if self._name else '' + return name_str diff --git a/python/paddle/incubate/sparse/unary.py b/python/paddle/incubate/sparse/unary.py index 85e4088de7d78..09e449b0d9c5e 100644 --- a/python/paddle/incubate/sparse/unary.py +++ b/python/paddle/incubate/sparse/unary.py @@ -12,11 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -__all__ = [] +from paddle import _C_ops +from paddle.fluid.framework import dygraph_only -from paddle import _C_ops, in_dynamic_mode +__all__ = [] +@dygraph_only def tanh(x, name=None): """ sparse tanh activation, requiring x to be a sparse coo or sparse csr tensor. @@ -44,17 +46,10 @@ def tanh(x, name=None): sparse_x = dense_x.to_sparse_coo(1) out = paddle.incubate.sparse.tanh(sparse_x) """ - - assert in_dynamic_mode(), "Currently, Sparse API only support dynamic mode" - - if x.is_sparse_coo() or x.is_sparse_csr(): - return _C_ops.final_state_sparse_tanh(x) - else: - raise ValueError( - "Currently, sparse.tanh only support the input of SparseCooTensor or SparseCsrTensor" - ) + return _C_ops.final_state_sparse_tanh(x) +@dygraph_only def sqrt(x, name=None): """ Calculate square root of x, requiring x to be a sparse coo or sparse csr tensor. @@ -82,17 +77,10 @@ def sqrt(x, name=None): sparse_x = dense_x.to_sparse_coo(1) out = paddle.incubate.sparse.sqrt(sparse_x) """ - - assert in_dynamic_mode(), "Currently, Sparse API only support dynamic mode" - - if x.is_sparse_coo() or x.is_sparse_csr(): - return _C_ops.final_state_sparse_sqrt(x) - else: - raise ValueError( - "Currently, sparse.sqrt only support the input of SparseCooTensor or SparseCsrTensor" - ) + return _C_ops.final_state_sparse_sqrt(x) +@dygraph_only def sin(x, name=None): """ Calculate sin of x, requiring x to be a sparse coo or sparse csr tensor. @@ -120,12 +108,4 @@ def sin(x, name=None): sparse_x = dense_x.to_sparse_coo(1) out = paddle.incubate.sparse.sin(sparse_x) """ - - assert in_dynamic_mode(), "Currently, Sparse API only support dynamic mode" - - if x.is_sparse_coo() or x.is_sparse_csr(): - return _C_ops.final_state_sparse_sin(x) - else: - raise ValueError( - "Currently, sparse.sin only support the input of SparseCooTensor or SparseCsrTensor" - ) + return _C_ops.final_state_sparse_sin(x) diff --git a/python/paddle/utils/code_gen/sparse_api.yaml b/python/paddle/utils/code_gen/sparse_api.yaml index 84c6d2a16af43..09bb610e5cb1c 100644 --- a/python/paddle/utils/code_gen/sparse_api.yaml +++ b/python/paddle/utils/code_gen/sparse_api.yaml @@ -46,6 +46,14 @@ layout : x backward : sin_grad +- api : softmax + args : (Tensor x, int axis=-1) + output : Tensor(out) + kernel : + func : softmax_csr{sparse_csr -> sparse_csr} + layout : x + backward : softmax_grad + - api : sqrt args : (Tensor x) output : Tensor(out) diff --git a/python/paddle/utils/code_gen/sparse_bw_api.yaml b/python/paddle/utils/code_gen/sparse_bw_api.yaml index 5d9874dff29ec..a4e83411bc846 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api.yaml +++ b/python/paddle/utils/code_gen/sparse_bw_api.yaml @@ -53,6 +53,13 @@ kernel : func : sparse_coo_sin_grad {sparse_coo, sparse_coo -> sparse_coo} +- backward_api : softmax_grad + forward : softmax(Tensor x, int axis=-1) -> Tensor(out) + args : (Tensor out, Tensor out_grad, int axis) + output : Tensor(x_grad) + kernel : + func : softmax_csr_grad{sparse_csr, sparse_csr -> sparse_csr} + - backward_api : sparse_maxpool_grad forward : sparse_maxpool(Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) -> Tensor(out), Tensor(rulebook) args : (Tensor x, Tensor rulebook, Tensor out, Tensor out_grad, int[] kernel_sizes) From 23504a61848368ec1bf8ecb1aa18d6ddb4017d50 Mon Sep 17 00:00:00 2001 From: zhouwei25 Date: Fri, 17 Jun 2022 11:59:14 +0000 Subject: [PATCH 2/2] fix comment --- paddle/phi/kernels/sparse/gpu/softmax_kernel.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu b/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu index a3bd396dd9c72..6353a46eea1ef 100644 --- a/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu @@ -56,9 +56,9 @@ __global__ void SoftmaxGpuKernel(const IntT* x_crows, if (idx >= row_nnz) break; auto functor = phi::funcs::CudaExpFunctor(); - out_values[row_first + idx] = - functor(x_values[row_first + idx] - row_max_val); - exp_sum += functor(x_values[row_first + idx] - row_max_val); + T exp = functor(x_values[row_first + idx] - row_max_val); + exp_sum += exp; + out_values[row_first + idx] = exp; } T row_exp_sum = phi::funcs::warpReduceSum(exp_sum, 0xFFFFFFFF);