Skip to content

Commit

Permalink
refine
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiqiu committed Mar 31, 2021
1 parent f4ec991 commit e5d847a
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 35 deletions.
35 changes: 1 addition & 34 deletions paddle/fluid/operators/npu_op_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ aclFormat ConvertToNpuFormat(DataLayout layout) {
return iter->second;
}

aclrtStream GetCurrentNPUStream(int device_id = -1) {
aclrtStream GetCurrentNPUStream(int device_id) {
if (device_id == -1) {
device_id = platform::GetCurrentNPUDeviceId();
}
Expand Down Expand Up @@ -302,38 +302,5 @@ void NpuOpRunner::Run(aclrtStream stream) {
PADDLE_ENFORCE_NPU_SUCCESS(ret);
}

template <typename T>
void FillNpuTensorWithConstant(Tensor *tensor, T val) {
PADDLE_ENFORCE_EQ(
tensor->IsInitialized(), true,
platform::errors::InvalidArgument("The tensor should be initialized."));
PADDLE_ENFORCE_EQ(
platform::is_npu_place(tensor->place()), true,
platform::errors::InvalidArgument("The tensor should be on NPUPlace."));
// do async for better performance
if (typeid(float) == typeid(T) || typeid(platform::float16) == typeid(T)) {
Tensor tmp(tensor->type());
tmp.Resize(tensor->dims());
tmp.mutable_data<T>(place);

platform::NPUMemsetAsync(tmp.data<void>(), 0, tmp.numel() * sizeof(T),
GetCurrentNPUStream(tmp.device));
NpuOpRunner("Power", {tmp}, {*tensor},
{{"power", static_cast<float>(1)},
{"scale", static_cast<float>(0)},
{"shift", static_cast<float>(val)}});
} else {
T *array = new T[tensor->numel()];
for (unsigned int i = 0; i < tensor->numel(); ++i) {
array[i] = static_cast<T>(val);
}
std::vector<T> vec(tensor->numel(), static_cast<T>(val));
// do sync copy
memory::Copy(BOOST_GET_CONST(platform::NPUPlace, tensor->place()),
tmp.data<void>(), platform::CPUPlace(), array, size, nullptr);
delete[] array;
}
}

} // namespace operators
} // namespace paddle
37 changes: 36 additions & 1 deletion paddle/fluid/operators/npu_op_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,43 @@ class NpuOpRunner {

aclDataType ConvertToNpuDtype(framework::proto::VarType::Type dtype);

aclrtStream GetCurrentNPUStream(int device_id = -1);

template <typename T>
void FillNpuTensorWithConstant(Tensor *tensor, T val);
void FillNpuTensorWithConstant(Tensor *tensor, T val) {
PADDLE_ENFORCE_EQ(
tensor->IsInitialized(), true,
platform::errors::InvalidArgument("The tensor should be initialized."));
PADDLE_ENFORCE_EQ(
platform::is_npu_place(tensor->place()), true,
platform::errors::InvalidArgument("The tensor should be on NPUPlace."));
// do async for better performance
if (typeid(float) == typeid(T) || typeid(platform::float16) == typeid(T)) {
Tensor tmp(tensor->type());
tmp.Resize(tensor->dims());
tmp.mutable_data<T>(tensor->place());
auto stream = GetCurrentNPUStream(
BOOST_GET_CONST(platform::NPUPlace, tensor->place()).device);
platform::NPUMemsetAsync(tmp.data<void>(), 0, tmp.numel() * sizeof(T),
stream);
auto runner = NpuOpRunner("Power", {tmp}, {*tensor},
{{"power", static_cast<float>(1)},
{"scale", static_cast<float>(0)},
{"shift", static_cast<float>(val)}});
runner.Run(stream);
} else {
T *array = new T[tensor->numel()];
for (unsigned int i = 0; i < tensor->numel(); ++i) {
array[i] = static_cast<T>(val);
}
std::vector<T> vec(tensor->numel(), static_cast<T>(val));
// do sync copy
memory::Copy(BOOST_GET_CONST(platform::NPUPlace, tensor->place()),
tensor->data<void>(), platform::CPUPlace(), array,
tensor->numel() * sizeof(T), nullptr);
delete[] array;
}
}

} // namespace operators
} // namespace paddle
Expand Down

0 comments on commit e5d847a

Please sign in to comment.