add batches_per_step(python side) (PaddlePaddle#90)

* add SetIpuIndexStage for model sharding/pipelinging * add batches_per_step
graphcore · Aug 25, 2021 · a871ac1 · a871ac1
1 parent ac5115a
commit a871ac1
Show file tree

Hide file tree

Showing 6 changed files with 34 additions and 15 deletions.
diff --git a/paddle/fluid/framework/ipu/ipu_backend.cc b/paddle/fluid/framework/ipu/ipu_backend.cc
@@ -105,14 +105,14 @@ void IpuBackend::Prepare() {
   for (popart::TensorId item : compiler_->GetOutputs()) {
     anchor_ids.push_back(item);
   }
-  auto dataFlow = popart::DataFlow(1, anchor_ids);
+  auto dataFlow = popart::DataFlow(ipu_strategy_->batches_per_step, anchor_ids);
 
   PADDLE_ENFORCE_NOT_NULL(
       curr_device_,
       platform::errors::Unavailable("IPU device isn't attached, please call "
                                     "IpuBackend::AttachDevice(id) first."));
 
-  if (ipu_strategy_ != nullptr && ipu_strategy_->is_training_) {
+  if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) {
     VLOG(1) << "Creating TrainingSession from Onnx Model...";
     auto popart_optimizer = GetPopartOptimizer();
     auto tensors = compiler_->GetTensors();
@@ -170,7 +170,7 @@ void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
     popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id));
   }
 
-  if (ipu_strategy_ != nullptr && ipu_strategy_->is_training_) {
+  if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) {
     VLOG(1) << "Update optimizer learning rate...";
     auto popart_optimizer = GetPopartOptimizer();
     auto session = dynamic_cast<popart::TrainingSession*>(session_.get());
@@ -197,12 +197,12 @@ float IpuBackend::GetLRFromScope() {
 
 // ipu_num_ must be pow(2,n);
 int IpuBackend::UpperIpuNum() {
-  PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus_, 0,
+  PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus, 0,
                     platform::errors::Unavailable(
                         "The ipu num get is wrong, please make sure the "
                         "sharding or pipline parameter is right."));
   int i = 0;
-  while (pow(2, i) < ipu_strategy_->num_ipus_) {
+  while (pow(2, i) < ipu_strategy_->num_ipus) {
     i++;
   }
   return pow(2, i);

diff --git a/paddle/fluid/framework/ipu/ipu_strategy.h b/paddle/fluid/framework/ipu/ipu_strategy.h
@@ -37,8 +37,9 @@ namespace ipu {
 using VirtualGraphMode = popart::VirtualGraphMode;
 
 struct IpuStrategy {
-  int num_ipus_ = 1;
-  bool is_training_ = true;
+  int num_ipus = 1;
+  int batches_per_step = 1;
+  bool is_training = true;
   popart::SessionOptions popart_options_;
 };
 

diff --git a/paddle/fluid/framework/ipu/supported_ops_custom.h b/paddle/fluid/framework/ipu/supported_ops_custom.h
@@ -32,7 +32,7 @@
     auto inputs = GetOpInputs(op_desc);                                   \
     auto outputs = op_desc->Output("__outputs__");                        \
     /*num_outputs training mode 5, inference mode 1*/                     \
-    auto num_outputs = ipu_strategy_->is_training_ ? 5 : 1;               \
+    auto num_outputs = ipu_strategy_->is_training ? 5 : 1;                \
     auto epsilon = BOOST_GET_CONST(float, op_desc->GetAttr("epsilon"));   \
     auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum")); \
     auto result = builder_->aiOnnxOpset11().batchnormalization(           \

diff --git a/paddle/fluid/framework/ir/ipu/inference_extract_pass.cc b/paddle/fluid/framework/ir/ipu/inference_extract_pass.cc
@@ -48,8 +48,8 @@ void InferenceExtractPass::ApplyImpl(ir::Graph* graph) const {
   static std::shared_ptr<ipu::IpuStrategy> ipu_strategy_instance_(
       new ipu::IpuStrategy());
 
-  ipu_strategy_instance_->is_training_ = false;
-  ipu_strategy_instance_->num_ipus_ = graph->Get<int>("num_ipus");
+  ipu_strategy_instance_->is_training = false;
+  ipu_strategy_instance_->num_ipus = graph->Get<int>("num_ipus");
   ipu_strategy_instance_->popart_options_.enablePipelining =
       graph->Get<bool>("enable_pipeline");
   auto& enable_sharding = graph->Get<bool>("enable_sharding");

diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -3204,18 +3204,27 @@ All parameter, weight, gradient are variables in Paddle.
       .def(py::init())
       .def_property(
           "num_ipus",
-          [](const ipu::IpuStrategy &self) { return self.num_ipus_; },
+          [](const ipu::IpuStrategy &self) { return self.num_ipus; },
           [](ipu::IpuStrategy &self, int num_ipus) {
-            self.num_ipus_ = num_ipus;
+            self.num_ipus = num_ipus;
           },
           R"DOC(
             Int type, set the number ipu we need. Default 1.
           )DOC")
+      .def_property(
+          "batches_per_step",
+          [](const ipu::IpuStrategy &self) { return self.batches_per_step; },
+          [](ipu::IpuStrategy &self, int batches_per_step) {
+            self.batches_per_step = batches_per_step;
+          },
+          R"DOC(
+            Int type, set batches_per_step. Default 1.
+          )DOC")
       .def_property(
           "is_training",
-          [](const ipu::IpuStrategy &self) { return self.is_training_; },
+          [](const ipu::IpuStrategy &self) { return self.is_training; },
           [](ipu::IpuStrategy &self, bool is_training) {
-            self.is_training_ = is_training;
+            self.is_training = is_training;
           },
           R"DOC(
             Bool type, True for training, False inference. Default True.

diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py
@@ -526,7 +526,7 @@ def __init__(self, program, scope=None, ipu_strategy=None):
             "popart_canonicalization_pass"
         ]
 
-    def compile(self, feed_list, fetch_list, scope=None):
+    def compile(self, feed_list, fetch_list, feed_var_name='feed', scope=None):
         for pass_name in self._graph_passes:
             graph_pass = core.get_pass(pass_name)
             graph_pass.apply(self._graph)
@@ -557,6 +557,15 @@ def compile(self, feed_list, fetch_list, scope=None):
             global_block = self._program.global_block()
             program.lr_sheduler.lr_var = global_block.vars[lr_var_name]
 
+        # with popart, we need to support batches_per_step, what means
+        # the shape of feed_var and feed_tensor(maybe numpy array) will
+        # mismatch, so we set need_check_feed to False. Thus we can avoid
+        # modify logic of run.
+        program_global_block = program.global_block()
+        for feed_name in feed_list:
+            feed_var = program_global_block.var(feed_name)
+            feed_var.desc.set_need_check_feed(False)
+
         return program