From d90295eb201698210913dce17335afe21607167e Mon Sep 17 00:00:00 2001 From: XBWGC <67684278+XBWGC@users.noreply.github.com> Date: Thu, 4 Nov 2021 09:17:35 +0800 Subject: [PATCH] add loss scaling (#265) --- paddle/fluid/framework/ipu/common.h | 1 + paddle/fluid/framework/ipu/ipu_executor.cc | 3 +++ paddle/fluid/framework/ipu/ipu_optimizer.cc | 13 ++++++++++--- paddle/fluid/framework/ipu/ipu_optimizer.h | 2 ++ paddle/fluid/framework/ipu/ipu_strategy.h | 4 ++++ paddle/fluid/pybind/pybind.cc | 9 +++++++++ .../tests/unittests/ipu/test_ipu_strategy_ipu.py | 4 ++++ .../fluid/tests/unittests/ipu/test_optimizer_ipu.py | 8 +++++++- 8 files changed, 40 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/ipu/common.h b/paddle/fluid/framework/ipu/common.h index 8ac30fd672c7e..30dbc14b51e5c 100644 --- a/paddle/fluid/framework/ipu/common.h +++ b/paddle/fluid/framework/ipu/common.h @@ -31,6 +31,7 @@ static constexpr const char *sBeta1 = "beta1"; static constexpr const char *sBeta2 = "beta2"; static constexpr const char *sBeta1Pow = "Beta1Pow"; static constexpr const char *sBeta2Pow = "Beta2Pow"; +static constexpr const char *sLossScaling = "LossScaling"; } // namespace ipu } // namespace framework diff --git a/paddle/fluid/framework/ipu/ipu_executor.cc b/paddle/fluid/framework/ipu/ipu_executor.cc index 6e6a520aafdc3..80686bcf2eaa3 100644 --- a/paddle/fluid/framework/ipu/ipu_executor.cc +++ b/paddle/fluid/framework/ipu/ipu_executor.cc @@ -39,6 +39,9 @@ void Executor::Prepare(const std::string &proto, "IpuBackend::AttachDevice(id) first.")); if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) { + VLOG(10) << "Set Loss Scaling for optimizer..."; + SetOptimizerAttr(sLossScaling, ipu_strategy_->loss_scaling); + VLOG(10) << "Creating TrainingSession from Onnx Model..."; auto popart_optimizer = GetPopartOptimizer(opt_info); diff --git a/paddle/fluid/framework/ipu/ipu_optimizer.cc b/paddle/fluid/framework/ipu/ipu_optimizer.cc index dbda366ef3c66..c41b9258a20f0 100644 --- a/paddle/fluid/framework/ipu/ipu_optimizer.cc +++ b/paddle/fluid/framework/ipu/ipu_optimizer.cc @@ -46,6 +46,13 @@ float OptmizerMetaInfo::GetWeightDecay() const { } } +float OptmizerMetaInfo::GetLossScaling() const { + if (attrs_.count(sLossScaling) != 0) { + return attrs_.at(sLossScaling); + } + return 1.0f; +} + popart::WeightDecayMode OptmizerMetaInfo::GetWeightDecayMode() const { if (type_ == OptimizerType::Adam) { if (attrs_.count("scale") != 0 && attrs_.at("scale") > 0.0f) { @@ -90,7 +97,7 @@ std::unique_ptr GetPopartOptimizer( popart::OptimizerValue(popart::SGD::getUnsetMomentum()), popart::OptimizerValue(popart::SGD::getUnsetDampening()), popart::OptimizerValue(popart::SGD::getUnsetVelocityScaling()), - popart::OptimizerValue(popart::SGD::getUnsetLossScaling())); + popart::OptimizerValue(opt_meta_info.GetLossScaling(), false)); return optimizer; } else if (opt_type == OptimizerType::Adam) { auto optimizer = std::make_unique( @@ -99,7 +106,7 @@ std::unique_ptr GetPopartOptimizer( popart::OptimizerValue(opt_meta_info.GetAttr("beta1"), false), popart::OptimizerValue(opt_meta_info.GetAttr("beta2"), false), popart::OptimizerValue(opt_meta_info.GetAttr("epsilon"), false), - popart::OptimizerValue(popart::Adam::getUnsetLossScaling()), + popart::OptimizerValue(opt_meta_info.GetLossScaling(), false), popart::AdamMode::Adam, opt_meta_info.GetWeightDecayMode(), popart::DataType::UNDEFINED, popart::DataType::FLOAT, popart::DataType::FLOAT); @@ -111,7 +118,7 @@ std::unique_ptr GetPopartOptimizer( popart::OptimizerValue(opt_meta_info.GetAttr("beta1"), false), popart::OptimizerValue(opt_meta_info.GetAttr("beta2"), false), popart::OptimizerValue(opt_meta_info.GetAttr("epsilon"), false), - popart::OptimizerValue(popart::Adam::getUnsetLossScaling()), + popart::OptimizerValue(opt_meta_info.GetLossScaling(), false), popart::AdamMode::Lamb, opt_meta_info.GetWeightDecayMode(), popart::DataType::UNDEFINED, popart::DataType::FLOAT, popart::DataType::FLOAT); diff --git a/paddle/fluid/framework/ipu/ipu_optimizer.h b/paddle/fluid/framework/ipu/ipu_optimizer.h index 29ae1cbc91ce3..8d9e06603a849 100644 --- a/paddle/fluid/framework/ipu/ipu_optimizer.h +++ b/paddle/fluid/framework/ipu/ipu_optimizer.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/ipu/common.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -52,6 +53,7 @@ class OptmizerMetaInfo { popart::DataType GetDType() const { return dtype_; } float GetWeightDecay() const; + float GetLossScaling() const; popart::WeightDecayMode GetWeightDecayMode() const; private: diff --git a/paddle/fluid/framework/ipu/ipu_strategy.h b/paddle/fluid/framework/ipu/ipu_strategy.h index ae3cd643be3e2..e73e30b242f1a 100644 --- a/paddle/fluid/framework/ipu/ipu_strategy.h +++ b/paddle/fluid/framework/ipu/ipu_strategy.h @@ -74,6 +74,10 @@ struct IpuStrategy { // available memory proportion, 0.0f for disable float available_memory_proportion = 0.0f; + // loss scaling, currently we can't get loss scaling from + // optimizer_extract_pass, so we have to set it here + float loss_scaling = 1.0f; + // popart session option popart::SessionOptions popart_options; popart::Patterns popart_patterns; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 24c84827b89e3..e85ad455aa564 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -3390,6 +3390,15 @@ All parameter, weight, gradient are variables in Paddle. R"DOC( Float type. Set the available memory proportion for matmul/conv, bigger value means more memory occupy, range [0.0f, 1.0f], 0.0 no effect, default 0.0f. + )DOC") + .def_property( + "loss_scaling", + [](const ipu::IpuStrategy &self) { return self.loss_scaling; }, + [](ipu::IpuStrategy &self, float loss_scaling) { + self.loss_scaling = loss_scaling; + }, + R"DOC( + Float type. Set the loss scaling for mixed-precision training. Default 1.0f. )DOC"); py::class_(m, "IpuCustomOpIdentifier") diff --git a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py index ee44635959a43..20fa8d79536ab 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py @@ -77,6 +77,10 @@ def test_training(self): assert ipu_strategy.available_mem_proportion == 0.5, \ "Set available_mem_proportion Failed" + ipu_strategy.loss_scaling = 5.0 + assert ipu_strategy.loss_scaling == 5.0, \ + "Set loss_scaling Failed" + @unittest.skipIf(not paddle.is_compiled_with_ipu(), "core is not compiled with IPU") diff --git a/python/paddle/fluid/tests/unittests/ipu/test_optimizer_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_optimizer_ipu.py index 03a5c2e661a70..3266e296aef78 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_optimizer_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_optimizer_ipu.py @@ -55,6 +55,7 @@ def set_attrs(self): self.attrs = { "optimizer": 'sgd', "weight_decay": 0.0, + "loss_scaling": 1.0, } def _test_optimizer(self, run_ipu=True): @@ -97,6 +98,7 @@ def _test_optimizer(self, run_ipu=True): fetch_list = [loss.name] ipu_strategy = compiler.get_ipu_strategy() ipu_strategy.is_training = True + ipu_strategy.loss_scaling = self.attrs["loss_scaling"] program = compiler.IpuCompiler( main_prog, ipu_strategy=ipu_strategy).compile(feed_list, fetch_list) @@ -118,12 +120,12 @@ def test(self): self.assertTrue(np.allclose(ipu_loss, cpu_loss, atol=self.atol)) -@unittest.skip('unsupported currently') class TestSGD(TestBase): def set_attrs(self): self.attrs = { "optimizer": 'sgd', "weight_decay": 0.1, + "loss_scaling": 2.0, } @@ -132,6 +134,7 @@ def set_attrs(self): self.attrs = { "optimizer": 'adam', "weight_decay": 0.1, + "loss_scaling": 3.0, } @@ -140,6 +143,7 @@ def set_attrs(self): self.attrs = { "optimizer": 'adam', "weight_decay": 0.0, + "loss_scaling": 4.0, } @@ -149,6 +153,7 @@ def set_attrs(self): self.attrs = { "optimizer": 'lamb', "weight_decay": 0.0, + "loss_scaling": 5.0, } @@ -158,6 +163,7 @@ def set_attrs(self): self.attrs = { "optimizer": 'lamb', "weight_decay": 0.1, + "loss_scaling": 6.0, }