Skip to content

Commit

Permalink
add loss scaling (PaddlePaddle#265)
Browse files Browse the repository at this point in the history
  • Loading branch information
XBWGC authored Nov 4, 2021
1 parent 3f1644b commit d90295e
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 4 deletions.
1 change: 1 addition & 0 deletions paddle/fluid/framework/ipu/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ static constexpr const char *sBeta1 = "beta1";
static constexpr const char *sBeta2 = "beta2";
static constexpr const char *sBeta1Pow = "Beta1Pow";
static constexpr const char *sBeta2Pow = "Beta2Pow";
static constexpr const char *sLossScaling = "LossScaling";

} // namespace ipu
} // namespace framework
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/ipu/ipu_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ void Executor::Prepare(const std::string &proto,
"IpuBackend::AttachDevice(id) first."));

if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) {
VLOG(10) << "Set Loss Scaling for optimizer...";
SetOptimizerAttr(sLossScaling, ipu_strategy_->loss_scaling);

VLOG(10) << "Creating TrainingSession from Onnx Model...";
auto popart_optimizer = GetPopartOptimizer(opt_info);

Expand Down
13 changes: 10 additions & 3 deletions paddle/fluid/framework/ipu/ipu_optimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,13 @@ float OptmizerMetaInfo::GetWeightDecay() const {
}
}

float OptmizerMetaInfo::GetLossScaling() const {
if (attrs_.count(sLossScaling) != 0) {
return attrs_.at(sLossScaling);
}
return 1.0f;
}

popart::WeightDecayMode OptmizerMetaInfo::GetWeightDecayMode() const {
if (type_ == OptimizerType::Adam) {
if (attrs_.count("scale") != 0 && attrs_.at("scale") > 0.0f) {
Expand Down Expand Up @@ -90,7 +97,7 @@ std::unique_ptr<popart::Optimizer> GetPopartOptimizer(
popart::OptimizerValue(popart::SGD::getUnsetMomentum()),
popart::OptimizerValue(popart::SGD::getUnsetDampening()),
popart::OptimizerValue(popart::SGD::getUnsetVelocityScaling()),
popart::OptimizerValue(popart::SGD::getUnsetLossScaling()));
popart::OptimizerValue(opt_meta_info.GetLossScaling(), false));
return optimizer;
} else if (opt_type == OptimizerType::Adam) {
auto optimizer = std::make_unique<popart::Adam>(
Expand All @@ -99,7 +106,7 @@ std::unique_ptr<popart::Optimizer> GetPopartOptimizer(
popart::OptimizerValue(opt_meta_info.GetAttr("beta1"), false),
popart::OptimizerValue(opt_meta_info.GetAttr("beta2"), false),
popart::OptimizerValue(opt_meta_info.GetAttr("epsilon"), false),
popart::OptimizerValue(popart::Adam::getUnsetLossScaling()),
popart::OptimizerValue(opt_meta_info.GetLossScaling(), false),
popart::AdamMode::Adam, opt_meta_info.GetWeightDecayMode(),
popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT);
Expand All @@ -111,7 +118,7 @@ std::unique_ptr<popart::Optimizer> GetPopartOptimizer(
popart::OptimizerValue(opt_meta_info.GetAttr("beta1"), false),
popart::OptimizerValue(opt_meta_info.GetAttr("beta2"), false),
popart::OptimizerValue(opt_meta_info.GetAttr("epsilon"), false),
popart::OptimizerValue(popart::Adam::getUnsetLossScaling()),
popart::OptimizerValue(opt_meta_info.GetLossScaling(), false),
popart::AdamMode::Lamb, opt_meta_info.GetWeightDecayMode(),
popart::DataType::UNDEFINED, popart::DataType::FLOAT,
popart::DataType::FLOAT);
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/ipu/ipu_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License. */
#include <popart/sgd.hpp>
#include <popart/tensorinfo.hpp>

#include "paddle/fluid/framework/ipu/common.h"
#include "paddle/fluid/platform/enforce.h"

namespace paddle {
Expand Down Expand Up @@ -52,6 +53,7 @@ class OptmizerMetaInfo {
popart::DataType GetDType() const { return dtype_; }

float GetWeightDecay() const;
float GetLossScaling() const;
popart::WeightDecayMode GetWeightDecayMode() const;

private:
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/ipu/ipu_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ struct IpuStrategy {
// available memory proportion, 0.0f for disable
float available_memory_proportion = 0.0f;

// loss scaling, currently we can't get loss scaling from
// optimizer_extract_pass, so we have to set it here
float loss_scaling = 1.0f;

// popart session option
popart::SessionOptions popart_options;
popart::Patterns popart_patterns;
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3390,6 +3390,15 @@ All parameter, weight, gradient are variables in Paddle.
R"DOC(
Float type. Set the available memory proportion for matmul/conv, bigger value
means more memory occupy, range [0.0f, 1.0f], 0.0 no effect, default 0.0f.
)DOC")
.def_property(
"loss_scaling",
[](const ipu::IpuStrategy &self) { return self.loss_scaling; },
[](ipu::IpuStrategy &self, float loss_scaling) {
self.loss_scaling = loss_scaling;
},
R"DOC(
Float type. Set the loss scaling for mixed-precision training. Default 1.0f.
)DOC");

py::class_<framework::ipu::IpuCustomOpIdentifier>(m, "IpuCustomOpIdentifier")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ def test_training(self):
assert ipu_strategy.available_mem_proportion == 0.5, \
"Set available_mem_proportion Failed"

ipu_strategy.loss_scaling = 5.0
assert ipu_strategy.loss_scaling == 5.0, \
"Set loss_scaling Failed"


@unittest.skipIf(not paddle.is_compiled_with_ipu(),
"core is not compiled with IPU")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def set_attrs(self):
self.attrs = {
"optimizer": 'sgd',
"weight_decay": 0.0,
"loss_scaling": 1.0,
}

def _test_optimizer(self, run_ipu=True):
Expand Down Expand Up @@ -97,6 +98,7 @@ def _test_optimizer(self, run_ipu=True):
fetch_list = [loss.name]
ipu_strategy = compiler.get_ipu_strategy()
ipu_strategy.is_training = True
ipu_strategy.loss_scaling = self.attrs["loss_scaling"]
program = compiler.IpuCompiler(
main_prog, ipu_strategy=ipu_strategy).compile(feed_list,
fetch_list)
Expand All @@ -118,12 +120,12 @@ def test(self):
self.assertTrue(np.allclose(ipu_loss, cpu_loss, atol=self.atol))


@unittest.skip('unsupported currently')
class TestSGD(TestBase):
def set_attrs(self):
self.attrs = {
"optimizer": 'sgd',
"weight_decay": 0.1,
"loss_scaling": 2.0,
}


Expand All @@ -132,6 +134,7 @@ def set_attrs(self):
self.attrs = {
"optimizer": 'adam',
"weight_decay": 0.1,
"loss_scaling": 3.0,
}


Expand All @@ -140,6 +143,7 @@ def set_attrs(self):
self.attrs = {
"optimizer": 'adam',
"weight_decay": 0.0,
"loss_scaling": 4.0,
}


Expand All @@ -149,6 +153,7 @@ def set_attrs(self):
self.attrs = {
"optimizer": 'lamb',
"weight_decay": 0.0,
"loss_scaling": 5.0,
}


Expand All @@ -158,6 +163,7 @@ def set_attrs(self):
self.attrs = {
"optimizer": 'lamb',
"weight_decay": 0.1,
"loss_scaling": 6.0,
}


Expand Down

0 comments on commit d90295e

Please sign in to comment.