Skip to content

Commit

Permalink
[Fluid] move lars_momentum to phi (PaddlePaddle#55798)
Browse files Browse the repository at this point in the history
* [Fluid] move lars_momentum to phi

* add sig

* fix optional Output

* off check_dygraph

* fix input

* fix operator[]

* fix

* try fix AllocateTmpTensor

* fix

* fix type

* Update paddle/phi/kernels/gpu/lars_momentum_kernel.cu

* fix type

* rollback

* Add Registration

* try fix win

* try fix win

* try use double

* try use operator *(float,const Derived &)

* try auto

* fix

* fix

* fix

* fix dtype

* fix type

* fix index
  • Loading branch information
gouzil authored and BeingGod committed Sep 9, 2023
1 parent 0d39da0 commit 091669f
Show file tree
Hide file tree
Showing 8 changed files with 381 additions and 305 deletions.
6 changes: 2 additions & 4 deletions paddle/fluid/operators/optimizers/lars_momentum_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/optimizers/lars_momentum_op.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -233,6 +234,3 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
ops::LarsMomentumOpVarTypeInference);

PD_REGISTER_STRUCT_KERNEL(
lars_momentum, CPU, ALL_LAYOUT, ops::LarsMomentumOpKernel, float, double) {}
74 changes: 0 additions & 74 deletions paddle/fluid/operators/optimizers/lars_momentum_op.h

This file was deleted.

2 changes: 1 addition & 1 deletion paddle/fluid/operators/optimizers/lars_momentum_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */

#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/optimizers/lars_momentum_op.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"

namespace paddle {
Expand Down
78 changes: 78 additions & 0 deletions paddle/phi/kernels/cpu/lars_momentum_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/kernels/lars_momentum_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"

namespace phi {

template <typename T, typename Context>
void LarsMomentumKernel(
const Context& dev_ctx,
const std::vector<const DenseTensor*>& param,
const std::vector<const DenseTensor*>& velocity,
const std::vector<const DenseTensor*>& learning_rate,
const std::vector<const DenseTensor*>& grad,
const paddle::optional<std::vector<const DenseTensor*>>& master_param,
const std::vector<float>& weight_decay_arr,
float mu,
float lars_coeff,
float epsilon,
bool multi_precision,
float rescale_grad,
std::vector<DenseTensor*> param_out,
std::vector<DenseTensor*> velocity_out,
std::vector<DenseTensor*> master_param_out) {
int op_num = param.size();
T mu_ = static_cast<T>(mu);
for (int i = 0; i < op_num; ++i) {
auto* lr = learning_rate[i]->data<T>();
T lars_weight_decay = weight_decay_arr[i];
dev_ctx.template Alloc<T>(param_out[i]);
dev_ctx.template Alloc<T>(velocity_out[i]);

auto p_out = phi::EigenVector<T>::Flatten(*(param_out[i]));
auto v_out = phi::EigenVector<T>::Flatten(*(velocity_out[i]));
auto p = phi::EigenVector<T>::Flatten(*(param[i]));
auto v = phi::EigenVector<T>::Flatten(*(velocity[i]));
Eigen::TensorMap<Eigen::Tensor<const T, 1, 1>> g =
phi::EigenVector<T>::Flatten(*(grad[i]));
auto rescale_g = static_cast<T>(rescale_grad) * g;

phi::DenseTensor p_norm_t, g_norm_t;
p_norm_t.Resize({1});
g_norm_t.Resize({1});
dev_ctx.template Alloc<T>(&p_norm_t);
dev_ctx.template Alloc<T>(&g_norm_t);
auto ep_norm = phi::EigenScalar<T>::From(p_norm_t);
auto eg_norm = phi::EigenScalar<T>::From(g_norm_t);
ep_norm = p.square().sum().sqrt();
eg_norm = rescale_g.square().sum().sqrt();

T local_lr = lr[0];
if (lars_weight_decay > 0 && ep_norm(0) > 0 && eg_norm(0) > 0) {
local_lr = lr[0] * lars_coeff * ep_norm(0) /
(eg_norm(0) + lars_weight_decay * ep_norm(0) + epsilon);
}
v_out = v * mu_ + local_lr * (rescale_g + lars_weight_decay * p);
p_out = p - v_out;
}
}

} // namespace phi

PD_REGISTER_KERNEL(
lars_momentum, CPU, ALL_LAYOUT, phi::LarsMomentumKernel, float, double) {}
Loading

0 comments on commit 091669f

Please sign in to comment.