forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NPU] Support npu op pow and pow grad (PaddlePaddle#31247)
* [NPU] Support npu op: (1) pow (2) pow_grad * Support fp16
- Loading branch information
1 parent
0592003
commit 5a69ab8
Showing
3 changed files
with
295 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the Licnse. */ | ||
|
||
#ifdef PADDLE_WITH_ASCEND_CL | ||
#include <memory> | ||
#include <string> | ||
|
||
#include "paddle/fluid/framework/ddim.h" | ||
#include "paddle/fluid/framework/tensor_util.h" | ||
#include "paddle/fluid/operators/activation_op.h" | ||
#include "paddle/fluid/operators/npu_op_runner.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
using Tensor = framework::Tensor; | ||
|
||
template <typename DeviceContext, typename T> | ||
class PowNPUKernel : public framework::OpKernel<T> { | ||
public: | ||
void Compute(const framework::ExecutionContext& ctx) const override { | ||
auto* x = ctx.Input<Tensor>("X"); | ||
auto* out = ctx.Output<Tensor>("Out"); | ||
auto factor = ctx.Attr<float>("factor"); | ||
|
||
out->mutable_data<T>(ctx.GetPlace()); | ||
|
||
auto runner = NpuOpRunner("Power", {*x}, {*out}, | ||
{{"power", factor}, | ||
{"scale", static_cast<float>(1.0)}, | ||
{"shift", static_cast<float>(0.0)}}); | ||
|
||
auto stream = | ||
ctx.template device_context<paddle::platform::NPUDeviceContext>() | ||
.stream(); | ||
runner.Run(stream); | ||
} | ||
}; | ||
|
||
template <typename DeviceContext, typename T> | ||
class PowGradNPUKernel : public framework::OpKernel<T> { | ||
public: | ||
void Compute(const framework::ExecutionContext& ctx) const override { | ||
auto* x = ctx.Input<Tensor>("X"); | ||
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out")); | ||
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X")); | ||
auto factor = ctx.Attr<float>("factor"); | ||
|
||
auto x_dims = x->dims(); | ||
|
||
auto place = ctx.GetPlace(); | ||
auto stream = | ||
ctx.template device_context<paddle::platform::NPUDeviceContext>() | ||
.stream(); | ||
|
||
// NOTE(liym27): dx = dout * factor * x.pow(factor-1) | ||
|
||
// Step1: Compute x_pow = x.pow(factor-1) | ||
Tensor x_pow(x->type()); | ||
x_pow.mutable_data<T>(x->dims(), place); | ||
auto runner_pow = NpuOpRunner("Power", {*x}, {x_pow}, | ||
{{"power", factor - static_cast<float>(1)}}); | ||
runner_pow.Run(stream); | ||
|
||
// Step 2: Construct a broadcast factor, which has the same shape with x. | ||
// 2.1 Get the shape of x | ||
Tensor x_shape(framework::proto::VarType::INT32); | ||
x_shape.mutable_data<int32_t>({x_dims.size()}, place); | ||
TensorFromVector(framework::vectorize<int32_t>(x_dims), | ||
ctx.device_context(), &x_shape); | ||
|
||
// 2.2 Get a factor tensor with shape [1]. | ||
Tensor factor_tensor(framework::proto::VarType::FP32); | ||
factor_tensor.mutable_data<float>({1}, place); | ||
TensorFromVector(std::vector<float>{factor}, ctx.device_context(), | ||
&factor_tensor); | ||
|
||
// 2.3 Get the factor which has the shape with x and the same value with | ||
// factor. | ||
Tensor factor_bc_tensor(framework::proto::VarType::FP32); | ||
factor_bc_tensor.mutable_data<float>(x_dims, place); | ||
auto runner_bc = NpuOpRunner("BroadcastTo", {factor_tensor, x_shape}, | ||
{factor_bc_tensor}, {}); | ||
runner_bc.Run(stream); | ||
|
||
// Step 3: Compute x_power_mul_factor = factor * x.pow(factor-1) | ||
Tensor x_power_mul_factor(x->type()); | ||
x_power_mul_factor.mutable_data<T>(x->dims(), place); | ||
auto runner_mul_1 = | ||
NpuOpRunner("Mul", {factor_bc_tensor, *x}, {x_power_mul_factor}, {}); | ||
runner_mul_1.Run(stream); | ||
|
||
// Step 4: Compute dx = dout * factor * x.pow(factor-1) | ||
dx->mutable_data<T>(place); | ||
auto runner_mul_2 = | ||
NpuOpRunner("Mul", {*dout, x_power_mul_factor}, {*dx}, {}); | ||
runner_mul_2.Run(stream); | ||
} | ||
}; | ||
|
||
} // namespace operators | ||
} // namespace paddle | ||
|
||
namespace ops = paddle::operators; | ||
|
||
REGISTER_OP_NPU_KERNEL( | ||
pow, ops::PowNPUKernel<paddle::platform::NPUDeviceContext, float>, | ||
ops::PowNPUKernel<paddle::platform::NPUDeviceContext, | ||
paddle::platform::float16>); | ||
|
||
REGISTER_OP_NPU_KERNEL( | ||
pow_grad, ops::PowGradNPUKernel<paddle::platform::NPUDeviceContext, float>, | ||
ops::PowGradNPUKernel<paddle::platform::NPUDeviceContext, | ||
paddle::platform::float16>); | ||
|
||
#endif |
152 changes: 152 additions & 0 deletions
152
python/paddle/fluid/tests/unittests/npu/test_pow_op_npu.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from __future__ import print_function | ||
|
||
import numpy as np | ||
import unittest | ||
import sys | ||
sys.path.append("..") | ||
from op_test import OpTest | ||
import paddle | ||
import paddle.fluid as fluid | ||
|
||
paddle.enable_static() | ||
SEED = 2021 | ||
|
||
|
||
@unittest.skipIf(not paddle.is_compiled_with_npu(), | ||
"core is not compiled with NPU") | ||
class TestPow(OpTest): | ||
def setUp(self): | ||
self.set_npu() | ||
self.op_type = "pow" | ||
self.place = paddle.NPUPlace(0) | ||
|
||
self.init_dtype() | ||
np.random.seed(SEED) | ||
x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype) | ||
out = np.power(x, 3) | ||
|
||
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} | ||
self.attrs = {'factor': 3.0} | ||
self.outputs = {'Out': out} | ||
|
||
def set_npu(self): | ||
self.__class__.use_npu = True | ||
|
||
def init_dtype(self): | ||
self.dtype = np.float32 | ||
|
||
def test_check_output(self): | ||
self.check_output_with_place(self.place, check_dygraph=False) | ||
|
||
# TODO(ascendrc): Add grad test | ||
# def test_check_grad(self): | ||
# if self.dtype == np.float16: | ||
# return | ||
# self.check_grad(['X'], 'Out') | ||
# | ||
|
||
|
||
@unittest.skipIf(not paddle.is_compiled_with_npu(), | ||
"core is not compiled with NPU") | ||
class TestPowFp16(OpTest): | ||
def setUp(self): | ||
self.set_npu() | ||
self.op_type = "pow" | ||
self.place = paddle.NPUPlace(0) | ||
|
||
self.init_dtype() | ||
np.random.seed(SEED) | ||
x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype) | ||
out = np.power(x, 3) | ||
|
||
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} | ||
self.attrs = {'factor': 3.0} | ||
self.outputs = {'Out': out} | ||
|
||
def set_npu(self): | ||
self.__class__.use_npu = True | ||
|
||
def init_dtype(self): | ||
self.dtype = np.float16 | ||
|
||
def test_check_output(self): | ||
self.check_output_with_place(self.place, check_dygraph=False) | ||
|
||
|
||
@unittest.skipIf(not paddle.is_compiled_with_npu(), | ||
"core is not compiled with NPU") | ||
class TestSubtractNet(unittest.TestCase): | ||
def _test(self, run_npu=True): | ||
main_prog = paddle.static.Program() | ||
startup_prog = paddle.static.Program() | ||
main_prog.random_seed = SEED | ||
startup_prog.random_seed = SEED | ||
np.random.seed(SEED) | ||
|
||
a_np = np.random.random(size=(32, 32)).astype('float32') | ||
b_np = np.random.random(size=(32, 32)).astype('float32') | ||
label_np = np.random.randint(2, size=(32, 1)).astype('int64') | ||
|
||
with paddle.static.program_guard(main_prog, startup_prog): | ||
a = paddle.static.data(name="a", shape=[32, 32], dtype='float32') | ||
b = paddle.static.data(name="b", shape=[32, 32], dtype='float32') | ||
label = paddle.static.data( | ||
name="label", shape=[32, 1], dtype='int64') | ||
|
||
sum = paddle.add(a, b) | ||
z = paddle.pow(sum, 2.0) | ||
|
||
fc_1 = fluid.layers.fc(input=z, size=128) | ||
prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') | ||
|
||
cost = fluid.layers.cross_entropy(input=prediction, label=label) | ||
loss = fluid.layers.reduce_mean(cost) | ||
sgd = fluid.optimizer.SGD(learning_rate=0.01) | ||
sgd.minimize(loss) | ||
|
||
if run_npu: | ||
place = paddle.NPUPlace(0) | ||
else: | ||
place = paddle.CPUPlace() | ||
|
||
exe = paddle.static.Executor(place) | ||
exe.run(startup_prog) | ||
|
||
for epoch in range(100): | ||
|
||
pred_res, loss_res = exe.run( | ||
main_prog, | ||
feed={"a": a_np, | ||
"b": b_np, | ||
"label": label_np}, | ||
fetch_list=[prediction, loss]) | ||
if epoch % 10 == 0: | ||
print("Epoch {} | Prediction[0]: {}, Loss: {}".format( | ||
epoch, pred_res[0], loss_res)) | ||
|
||
return pred_res, loss_res | ||
|
||
def test_npu(self): | ||
cpu_pred, cpu_loss = self._test(False) | ||
npu_pred, npu_loss = self._test(True) | ||
|
||
self.assertTrue(np.allclose(npu_pred, cpu_pred)) | ||
self.assertTrue(np.allclose(npu_loss, cpu_loss)) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |