diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
index 7ae3b2303decd..463803dd03f10 100644
--- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
@@ -185,12 +185,9 @@ void Compiler::RegisterOpFunc() {
      auto debug_context = BuildDebugContext(op_desc);         \
      auto aiGraphcoreOpset = builder_->aiGraphcoreOpset1();   \
      auto aiOnnxOpset = builder_->aiOnnxOpset11();            \
-     PushNameScope(op_desc);                                  \
+     NameScopeHelper ns_helper(op_desc, builder_.get());      \
      auto output_ids = OnnxImpl(inputs Args, debug_context);  \
-     PopNameScope(op_desc);                                   \
-     SetIpuIndexStage(output_ids, op_desc);                   \
-     SetAMPAttributes(output_ids, op_desc);                   \
-     SetSerializeAttributes(output_ids, op_desc);             \
+     PostLower(output_ids, op_desc);                          \
      InsertTensors(output_names, output_ids);                 \
    }},  // NOLINT
 #include "paddle/fluid/platform/device/ipu/supported_ops_autogen.h"
@@ -273,10 +270,9 @@ void Compiler::LowerConstants(const Scope* scope) {
       popart::TensorInfo tensor_info(PdDataType2PopartType(tensor->dtype()),
                                      shape);
       const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info));
-      PushNameScope(op_desc);
+      NameScopeHelper ns_helper(op_desc, builder_.get());
       popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data);
-      PopNameScope(op_desc);
-      SetIpuIndexStage(result, op_desc);
+      PostLower(result, op_desc);
       resources_->tensors.emplace(tensor_name, result);
     }
   }
@@ -285,42 +281,42 @@ void Compiler::LowerConstants(const Scope* scope) {
 
 void Compiler::LowerWeights(const Scope* scope) {
   VLOG(10) << "enter Compiler::LowerWeights";
-  // at this step, the graph doesn't contains optimizer related states
+  // At this step, the graph doesn't contains optimizer related states
   for (auto id : graph_helper_->sorted_vars_id) {
     auto* node = graph_helper_->nodes_id_map[id];
-    if (node->IsVar() && !node->IsCtrlVar() && node->Var()) {
-      if (node->Var()->Persistable() && node->inputs.empty()) {
-        auto var_name = node->Var()->Name();
-        if (resources_->tensors.count(var_name) != 0) {
-          VLOG(10) << "found existed one, skip lowering Weight: " << var_name;
-          continue;
-        }
-        if (var_name.rfind("learning_rate", 0) == 0) {
-          VLOG(10) << "skip learning_rate_var: " << var_name;
-          continue;
-        }
-        VLOG(10) << "lowering weight: " << var_name;
-
-        auto var = scope->FindVar(var_name);
-        if (var) {
-          auto tensor = var->Get<framework::LoDTensor>();
-          auto dtype = PdDataType2PopartType(tensor.dtype());
-          auto shape = std::vector<int64_t>();
-          for (size_t i = 0; i < tensor.dims().size(); ++i) {
-            shape.push_back(tensor.dims().at(i));
-          }
-          popart::TensorInfo tensor_info(dtype, shape);
-          popart::ConstVoidData const_data{tensor.data(), tensor_info};
-          if (!node->outputs.empty()) {
-            auto op_node = node->outputs[0];
-            PushNameScope(op_node->Op());
-            popart::TensorId result =
-                builder_->addInitializedInputTensor(const_data, var_name);
-            PopNameScope(op_node->Op());
-            resources_->tensors.emplace(var_name, result);
-            resources_->weights.push_back(var_name);
-          }
-        }
+    // Weights are var node and Persistable
+    if (node->IsVar() && !node->IsCtrlVar() && node->Var() &&
+        node->Var()->Persistable()) {
+      // Weights are Parameter in training mode
+      if (ipu_strategy_->is_training && !node->Var()->IsParameter()) {
+        continue;
+      }
+      auto var_name = node->Var()->Name();
+      // Some op has same input and output tensor, like batchnorm
+      if (resources_->tensors.count(var_name) != 0) {
+        VLOG(10) << "found existed one, skip lowering Weight: " << var_name;
+        continue;
+      }
+      VLOG(10) << "lowering weight: " << var_name;
+      auto var = scope->FindVar(var_name);
+      PADDLE_ENFORCE_NOT_NULL(
+          var, platform::errors::NotFound("Tensor %s is not found in the scope",
+                                          var_name));
+      auto tensor = var->Get<framework::LoDTensor>();
+      auto dtype = PdDataType2PopartType(tensor.dtype());
+      auto shape = std::vector<int64_t>();
+      for (size_t i = 0; i < tensor.dims().size(); ++i) {
+        shape.push_back(tensor.dims().at(i));
+      }
+      popart::TensorInfo tensor_info(dtype, shape);
+      popart::ConstVoidData const_data{tensor.data(), tensor_info};
+      if (!node->outputs.empty()) {
+        auto op_node = node->outputs[0];
+        NameScopeHelper ns_helper(op_node->Op(), builder_.get());
+        popart::TensorId result =
+            builder_->addInitializedInputTensor(const_data, var_name);
+        resources_->tensors.emplace(var_name, result);
+        resources_->weights.push_back(var_name);
       }
     }
   }
@@ -341,10 +337,9 @@ void Compiler::LowerBody() {
     } else if (op_type == "popart_checkpointoutput") {
       auto inputs = GetOpInputs(op_desc);
       auto outputs = GetOpOutputs(op_desc);
-      PushNameScope(op_desc);
+      NameScopeHelper ns_helper(op_desc, builder_.get());
       auto output_ids = builder_->checkpointOutput(inputs);
-      PopNameScope(op_desc);
-      SetIpuIndexStage(output_ids, op_desc);
+      PostLower(output_ids, op_desc);
       InsertTensors(outputs, output_ids);
     } else if (op_type == "popart_custom_op") {
       auto inputs = GetOpInputs(op_desc);
@@ -359,12 +354,11 @@ void Compiler::LowerBody() {
           BOOST_GET_CONST(std::string, op_desc->GetAttr("__op_type"));
       VLOG(10) << "Build graph from custom op: " << __op_type;
       auto it = custom_ops_.find(__op_type);
-      PushNameScope(op_desc);
+      NameScopeHelper ns_helper(op_desc, builder_.get());
       auto output_ids =
           builder_->customOp(it->second.popart_op, it->second.popart_op.version,
                              inputs, outputs.size(), attributes, debug_context);
-      PopNameScope(op_desc);
-      SetIpuIndexStage(output_ids, op_desc);
+      PostLower(output_ids, op_desc);
       InsertTensors(outputs, output_ids);
     } else if (op_type == "popart_printtensor") {
       auto inputs = GetOpInputs(op_desc);
@@ -373,11 +367,10 @@ void Compiler::LowerBody() {
       auto print_gradient =
           BOOST_GET_CONST(int64_t, op_desc->GetAttr("print_gradient"));
       auto title = BOOST_GET_CONST(std::string, op_desc->GetAttr("title"));
-      PushNameScope(op_desc);
+      NameScopeHelper ns_helper(op_desc, builder_.get());
       auto output_ids = builder_->aiGraphcoreOpset1().printtensor(
           inputs, print_gradient, debug_context, title);
-      PopNameScope(op_desc);
-      SetIpuIndexStage(output_ids, op_desc);
+      PostLower(output_ids, op_desc);
       InsertTensors(outputs, output_ids);
     } else {
       auto itr = name_function_.find(op_type);
@@ -625,12 +618,13 @@ void Compiler::InsertTensors(const std::vector<std::string>& output_names,
   resources_->tensors.emplace(output_names[0], tensor_id);
 }
 
-void Compiler::SetIpuIndexStage(const std::vector<std::string>& tensor_ids,
-                                const OpDesc* op_desc) {
-  VLOG(10) << "enter Compiler::SetIpuIndexStage";
+void Compiler::PostLower(const std::vector<std::string>& tensor_ids,
+                         const OpDesc* op_desc) {
+  // Set pipline
+  // Due to the limitation of popart, if an op has multiple outputs,
+  // pipline settings needs to be set at the same time
   auto tensor_ids_set =
       std::set<std::string>(tensor_ids.begin(), tensor_ids.end());
-
   if (op_desc->HasAttr(sIpuIndexAttr)) {
     auto ipu_index = BOOST_GET_CONST(int, op_desc->GetAttr(sIpuIndexAttr));
     builder_->virtualGraph(tensor_ids_set, ipu_index);
@@ -639,18 +633,24 @@ void Compiler::SetIpuIndexStage(const std::vector<std::string>& tensor_ids,
     if (op_desc->HasAttr(sIpuStageAttr)) {
       auto ipu_stage = BOOST_GET_CONST(int, op_desc->GetAttr(sIpuStageAttr));
       builder_->pipelineStage(tensor_ids_set, ipu_stage);
-      VLOG(10) << "set " << sIpuStageAttr << "= " << ipu_stage
+      VLOG(10) << "set " << sIpuStageAttr << " = " << ipu_stage
                << " for op: " << op_desc->Type();
     }
   }
-  VLOG(10) << "leave Compiler::SetIpuIndexStage";
+
+  for (auto& tensor_id : tensor_ids) {
+    PostLower(tensor_id, op_desc, true);
+  }
 }
 
-void Compiler::SetIpuIndexStage(const std::string& tensor_id,
-                                const OpDesc* op_desc) {
-  VLOG(10) << "enter Compiler::SetIpuIndexStage";
+void Compiler::PostLower(const std::string& tensor_id, const OpDesc* op_desc) {
+  PostLower(tensor_id, op_desc, false);
+}
 
-  if (op_desc->HasAttr(sIpuIndexAttr)) {
+void Compiler::PostLower(const std::string& tensor_id, const OpDesc* op_desc,
+                         bool skip_pipline) {
+  // Set pipline
+  if (!skip_pipline && op_desc->HasAttr(sIpuIndexAttr)) {
     auto ipu_index = BOOST_GET_CONST(int, op_desc->GetAttr(sIpuIndexAttr));
     builder_->virtualGraph(tensor_id, ipu_index);
     VLOG(10) << "set " << sIpuIndexAttr << " = " << ipu_index
@@ -658,32 +658,18 @@ void Compiler::SetIpuIndexStage(const std::string& tensor_id,
     if (op_desc->HasAttr(sIpuStageAttr)) {
       auto ipu_stage = BOOST_GET_CONST(int, op_desc->GetAttr(sIpuStageAttr));
       builder_->pipelineStage(tensor_id, ipu_stage);
-      VLOG(10) << "set " << sIpuStageAttr << "= " << ipu_stage
+      VLOG(10) << "set " << sIpuStageAttr << " = " << ipu_stage
                << " for op: " << op_desc->Type();
     }
   }
-  VLOG(10) << "leave Compiler::SetIpuIndexStage";
-}
-
-void Compiler::SetAMPAttributes(const std::vector<std::string>& tensor_ids,
-                                const OpDesc* op_desc) {
-  if (op_desc->Type() == "popart_matmul") {
-    for (const auto& tensor_id : tensor_ids) {
-      SetAMPAttributes(tensor_id, op_desc);
-    }
-  }
-}
-
-void Compiler::SetAMPAttributes(const std::string& tensor_id,
-                                const OpDesc* op_desc) {
-  VLOG(10) << "enter Compiler::SetAMPAttributes";
+  // Set amp
   if (op_desc->Type() == "popart_matmul") {
     if (set_amp_for_all_) {
       auto amp = ipu_strategy_->available_memory_proportion;
       if (amp < 0.0f || amp > 1.0) {
         PADDLE_THROW(platform::errors::InvalidArgument(
-            "AvailableMemoryProportion %f is invalid, which should be set 0 <= "
-            "amp <= 1",
+            "AvailableMemoryProportion %f is invalid, which should be in "
+            "range [0.0, 1.0]",
             amp));
       }
       if (amp > 0.0f) {
@@ -694,8 +680,8 @@ void Compiler::SetAMPAttributes(const std::string& tensor_id,
         auto amp = BOOST_GET_CONST(float, op_desc->GetAttr(sAvailMemAttribute));
         if (amp < 0.0f || amp > 1.0) {
           PADDLE_THROW(platform::errors::InvalidArgument(
-              "AvailableMemoryProportion %f is invalid, which should be set 0 "
-              "<= amp <= 1",
+              "AvailableMemoryProportion %f is invalid, which should be in "
+              "range [0.0, 1.0]",
               amp));
         }
         if (amp > 0.0f) {
@@ -705,17 +691,7 @@ void Compiler::SetAMPAttributes(const std::string& tensor_id,
         }
       }
     }
-  }
-  VLOG(10) << "leave Compiler::SetAMPAttributes";
-}
-
-void Compiler::SetSerializeAttributes(
-    const std::vector<std::string>& tensor_ids, const OpDesc* op_desc) {
-  VLOG(10) << "enter Compiler::SetSerializeAttributes";
-  auto tensor_ids_set =
-      std::set<std::string>(tensor_ids.begin(), tensor_ids.end());
-
-  if (op_desc->Type() == "popart_matmul") {
+    // Set serialize matmul
     if (op_desc->HasAttr(sMatmulSerializeFactor)) {
       auto factor =
           BOOST_GET_CONST(int, op_desc->GetAttr(sMatmulSerializeFactor));
@@ -724,16 +700,9 @@ void Compiler::SetSerializeAttributes(
         mode = BOOST_GET_CONST(std::string,
                                op_desc->GetAttr(sMatmulSerializeMode));
       }
-      builder_->setSerializeMatMul(tensor_ids_set, mode, (int64_t)factor, true);
+      builder_->setSerializeMatMul({tensor_id}, mode, factor, true);
     }
   }
-  VLOG(10) << "leave Compiler::SetSerializeAttributes";
-}
-
-void Compiler::SetSerializeAttributes(const std::string& tensor_id,
-                                      const OpDesc* op_desc) {
-  std::vector<std::string> tensor_ids = {tensor_id};
-  SetSerializeAttributes(tensor_ids, op_desc);
 }
 
 void Compiler::SetCustomOps(
@@ -793,29 +762,6 @@ popart::DebugContext Compiler::BuildDebugContext(const OpDesc* op) {
   return popart::DebugContext(op_identify_id);
 }
 
-void Compiler::PushNameScope(const OpDesc* op) {
-  auto op_namescope = BOOST_GET_CONST(std::string, op->GetAttr(sOpNamescope));
-  if (op_namescope == "/") {
-    return;
-  }
-  if (!op_namescope.empty()) {
-    op_namescope.pop_back();
-  }
-  if (!op_namescope.empty()) {
-    op_namescope.erase(op_namescope.begin());
-  }
-  VLOG(10) << "name_scope is: " << op_namescope;
-  builder_->pushNameScope(op_namescope);
-}
-
-void Compiler::PopNameScope(const OpDesc* op) {
-  auto op_namescope = BOOST_GET_CONST(std::string, op->GetAttr(sOpNamescope));
-  if (op_namescope == "/") {
-    return;
-  }
-  builder_->popNameScope();
-}
-
 }  // namespace ipu
 }  // namespace platform
 }  // namespace paddle
diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.h b/paddle/fluid/platform/device/ipu/ipu_compiler.h
index 2d00970bf1297..bf00a453881b7 100644
--- a/paddle/fluid/platform/device/ipu/ipu_compiler.h
+++ b/paddle/fluid/platform/device/ipu/ipu_compiler.h
@@ -70,7 +70,7 @@ struct CompilerResources {
   std::unique_ptr<popart::Optimizer> optimizer;
 };
 
-// helper for lowering graph
+// Helper for lowering graph
 struct GraphHelper {
   explicit GraphHelper(const Graph *);
 
@@ -81,6 +81,30 @@ struct GraphHelper {
   std::vector<int> sorted_vars_id;
 };
 
+// Helper for adding namescope info
+struct NameScopeHelper {
+  NameScopeHelper(const OpDesc *op, popart::Builder *builder)
+      : builder_(builder) {
+    auto op_namescope = BOOST_GET_CONST(std::string, op->GetAttr(sOpNamescope));
+    if (op_namescope.empty() || op_namescope == "/") {
+      return;
+    }
+    op_namescope.pop_back();
+    op_namescope.erase(op_namescope.begin());
+    builder->pushNameScope(op_namescope);
+    pushed_ = true;
+  }
+
+  ~NameScopeHelper() {
+    if (pushed_) {
+      builder_->popNameScope();
+    }
+  }
+
+  bool pushed_ = false;
+  popart::Builder *builder_;
+};
+
 class Compiler {
  public:
   Compiler();
@@ -119,18 +143,9 @@ class Compiler {
                      const std::vector<std::string> &tensor_ids);
   void InsertTensors(const std::vector<std::string> &output_names,
                      const std::string &tensor_id);
-  void SetIpuIndexStage(const std::vector<std::string> &tensor_ids,
-                        const OpDesc *op_desc);
-  void SetIpuIndexStage(const std::string &tensor_id, const OpDesc *op_desc);
-  void SetAMPAttributes(const std::vector<std::string> &tensor_ids,
-                        const OpDesc *op_desc);
-  void SetAMPAttributes(const std::string &tensor_id, const OpDesc *op_desc);
-  void SetSerializeAttributes(const std::vector<std::string> &tensor_ids,
-                              const OpDesc *op_desc);
-  void SetSerializeAttributes(const std::string &tensor_id,
-                              const OpDesc *op_desc);
-  void PushNameScope(const OpDesc *op);
-  void PopNameScope(const OpDesc *op);
+  void PostLower(const std::vector<std::string> &, const OpDesc *);
+  void PostLower(const std::string &, const OpDesc *);
+  void PostLower(const std::string &, const OpDesc *, bool);
 
  private:
   std::unique_ptr<popart::Builder> builder_;
diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc
index 649b291244110..4b8c8286e22e9 100644
--- a/paddle/fluid/platform/device/ipu/ipu_executor.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc
@@ -20,6 +20,40 @@ namespace paddle {
 namespace platform {
 namespace ipu {
 
+// Get paddle prefix and popart postfix of weight states
+// Format: {popart_postfix, paddle_prefix}
+std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
+    const std::string &opt_type) {
+  std::vector<std::pair<std::string, std::string>> pre_post_fix;
+  // Weight self
+  pre_post_fix.push_back(std::make_pair("", ""));
+
+  // Weight states
+  // TODO(alleng) support pair("Accl1___", "_moment1_{id!=0}")
+  if (opt_type == "adam" || opt_type == "lamb" || opt_type == "adamw") {
+    pre_post_fix.push_back(std::make_pair("Accl1___", "_moment1_0"));
+    pre_post_fix.push_back(std::make_pair("Accl2___", "_moment2_0"));
+    pre_post_fix.push_back(std::make_pair("Step___", "_beta1_pow_acc_0"));
+  } else if (opt_type == "momentum") {
+    pre_post_fix.push_back(std::make_pair("Accl___", "_velocity_0"));
+  } else if (opt_type == "adamax") {
+    pre_post_fix.push_back(std::make_pair("Accl1___", "_moment_0"));
+    pre_post_fix.push_back(std::make_pair("Accl2___", "_inf_norm__0"));
+    pre_post_fix.push_back(std::make_pair("Step___", "_beta1_pow_acc_0"));
+  } else if (opt_type == "adagrad") {
+    pre_post_fix.push_back(std::make_pair("Accl1___", "_moment_0"));
+  } else if (opt_type == "adadelta") {
+    pre_post_fix.push_back(std::make_pair("Accl1___", "__avg_squared_grad_0"));
+    pre_post_fix.push_back(
+        std::make_pair("Accl2___", "__avg_squared_update_0"));
+  } else if (opt_type == "rmsprop") {
+    pre_post_fix.push_back(std::make_pair("Accl1___", "_mean_square_0"));
+    pre_post_fix.push_back(std::make_pair("Accl2___", "_mean_grad_0"));
+    pre_post_fix.push_back(std::make_pair("Accl3___", "_momentum__0"));
+  }
+  return pre_post_fix;
+}
+
 Executor::~Executor() {
   Detach();
   session_.reset();
diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc
index f52499a8d8fda..c208a0eca5707 100644
--- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc
@@ -412,6 +412,15 @@ IpuStrategy::IpuStrategy() {
 
   RegisterGetter(map_options_getter, options_type, "gcl_options", "map",
                  [&]() { return popart_options.gclOptions; });
+
+  // Default options
+
+  // Can also be set as a custom logger in python, like using tqdm
+  popart_options.compilationProgressLogger = [](int progress, int total) {
+    if (progress % 10 == 0) {
+      VLOG(1) << "compile progress: " << progress << "%";
+    }
+  };
 }
 
 void IpuStrategy::AddBoolOption(const std::string& option, bool value) {
@@ -513,6 +522,11 @@ void IpuStrategy::AddCustomOp(const std::string& paddle_op,
       IpuCustomOpIdentifier(paddle_op, popart_op, domain, version));
 }
 
+void IpuStrategy::SetCompilationProgressLogger(
+    const std::function<void(int, int)>& logger) {
+  popart_options.compilationProgressLogger = logger;
+}
+
 std::string IpuStrategy::GetOption(const std::string& option) {
   return get(option, options_getter);
 }
diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h
index 1802eb16e5895..26566bc18fed0 100644
--- a/paddle/fluid/platform/device/ipu/ipu_strategy.h
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h
@@ -125,6 +125,8 @@ class IpuStrategy {
                                           const std::vector<int> &values);
   void AddCustomOp(const std::string &paddle_op, const std::string &popart_op,
                    const std::string &domain, int version);
+  void SetCompilationProgressLogger(
+      const std::function<void(int, int)> &logger);
 
   std::string GetOption(const std::string &);
   std::vector<std::string> GetVectorOption(const std::string &);
diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.cc b/paddle/fluid/platform/device/ipu/ipu_utils.cc
index 720de822608b6..843f3ffde9e45 100644
--- a/paddle/fluid/platform/device/ipu/ipu_utils.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_utils.cc
@@ -184,27 +184,6 @@ bool GetBoolEnv(std::string str) {
   }
 }
 
-std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
-    const std::string& opt_type) {
-  // format: {popart_tensor_id, paddle_tensor_id}, ...
-  std::vector<std::pair<std::string, std::string>> pre_post_fix;
-
-  if (opt_type == "adam" || opt_type == "lamb") {
-    pre_post_fix.push_back(std::make_pair("", ""));
-    pre_post_fix.push_back(std::make_pair("Accl1___", "_moment1_0"));
-    pre_post_fix.push_back(std::make_pair("Accl2___", "_moment2_0"));
-    pre_post_fix.push_back(std::make_pair("Step___", "_beta1_pow_acc_0"));
-  } else if (opt_type == "sgd" || opt_type == "momentum") {
-    // sgd
-    pre_post_fix.push_back(std::make_pair("", ""));
-  } else {
-    pre_post_fix.push_back(std::make_pair("", ""));
-    //
-  }
-
-  return pre_post_fix;
-}
-
 int RequestIpus(const int num_ipus) {
   // num_ipus must be pow(2, n);
   return std::pow(2, ceil(log2(num_ipus)));
diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.h b/paddle/fluid/platform/device/ipu/ipu_utils.h
index 7644513cc0207..50859aebdb311 100644
--- a/paddle/fluid/platform/device/ipu/ipu_utils.h
+++ b/paddle/fluid/platform/device/ipu/ipu_utils.h
@@ -229,9 +229,6 @@ struct ConstantOpAttrVisitor : public boost::static_visitor<void> {
   void operator()(boost::blank) const { RaiseError(); }
 };
 
-std::vector<std::pair<std::string, std::string>> GetOptPrePostfix(
-    const std::string& opt_type);
-
 int RequestIpus(const int num_ipus);
 
 }  // namespace ipu
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 3a242fe2582a5..5dd06ccad0044 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -4353,7 +4353,10 @@ All parameter, weight, gradient are variables in Paddle.
              for (auto element : opt) {
                auto option_name = element.first.cast<std::string>();
                VLOG(10) << "Set option: " << option_name;
-               if (py::isinstance<py::bool_>(element.second)) {
+               if (option_name == "compilation_progress_logger") {
+                 self.SetCompilationProgressLogger(
+                     element.second.cast<py::function>());
+               } else if (py::isinstance<py::bool_>(element.second)) {
                  self.AddBoolOption(option_name, element.second.cast<bool>());
                } else if (py::isinstance<py::float_>(element.second)) {
                  self.AddDoubleOption(option_name,
diff --git a/python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt
index 79a2430a16170..4826b37512614 100644
--- a/python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt
@@ -11,4 +11,5 @@ if(WITH_IPU)
     set_tests_properties(test_conv_op_ipu PROPERTIES TIMEOUT 300)
     set_tests_properties(test_elemetwise_x_op_ipu PROPERTIES TIMEOUT 300)
     set_tests_properties(test_reduce_x_op_ipu PROPERTIES TIMEOUT 600)
+    set_tests_properties(test_save_load_ipu PROPERTIES TIMEOUT 600)
 endif()
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py
index debd9ed19827c..45f75f1b4df81 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_ipu_strategy_ipu.py
@@ -73,10 +73,15 @@ def test_set_other_options(self):
             'autoReport.directory': 'path',
             'autoReport.all': 'true'
         }
+        options['random_seed'] = 1234
         for k, v in options.items():
             ipu_strategy.set_options({k: v})
             assert v == ipu_strategy.get_option(k), f"set {k} to {v} failed "
 
+        # The custom logger need 2 int as inputs
+        logger = lambda progress, total: print(f"compile progrss: {progress}/{total}")
+        ipu_strategy.set_options({'compilation_progress_logger': logger})
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ipu/test_save_load_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_save_load_ipu.py
index ba6eb4d38bcf2..c8f0961baa480 100644
--- a/python/paddle/fluid/tests/unittests/ipu/test_save_load_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_save_load_ipu.py
@@ -14,9 +14,11 @@
 
 import tempfile
 import unittest
+from functools import partial
 
 import numpy as np
 import paddle
+import paddle.optimizer
 import paddle.static
 from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest
 
@@ -28,7 +30,8 @@ def setUp(self):
         self.set_atol()
         self.set_data_feed()
         self.set_feed_attr()
-        self.set_op_attrs()
+        self.set_attrs()
+        self.set_optimizer()
 
     def set_data_feed(self):
         data = np.random.uniform(size=[1, 3, 10, 10])
@@ -39,15 +42,16 @@ def set_feed_attr(self):
         self.feed_shape = [x.shape for x in self.feed_fp32.values()]
         self.feed_list = list(self.feed_fp32.keys())
 
-    def set_op_attrs(self):
+    def set_attrs(self):
         self.attrs = {}
         self.attrs['steps'] = 100
         self.attrs['save_at_step'] = 20
-        self.attrs['is_training'] = True
-        self.attrs['opt_type'] = 'sgd'
         self.attrs['enable_fp16'] = False
         self.attrs['model_path'] = tempfile.TemporaryDirectory()
 
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.SGD, learning_rate=1e-1)
+
     def _test_base(self, save_otherwise_load):
         scope = paddle.static.Scope()
         main_prog = paddle.static.Program()
@@ -71,16 +75,8 @@ def _test_base(self, save_otherwise_load):
                         name='conv2d')
                     loss = paddle.mean(conv1)
 
-                    if self.attrs['is_training']:
-                        if self.attrs['opt_type'] == 'sgd':
-                            sgd = paddle.optimizer.SGD(learning_rate=1e-2)
-                            sgd.minimize(loss)
-                        elif self.attrs['opt_type'] == 'adam':
-                            adam = paddle.optimizer.Adam(learning_rate=1e-2)
-                            adam.minimize(loss)
-                        elif self.attrs['opt_type'] == 'lamb':
-                            lamb = paddle.optimizer.Lamb(learning_rate=1e-2)
-                            lamb.minimize(loss)
+                    # apply optimizer
+                    self.optimizer().minimize(loss)
                     fetch_list = [loss.name]
 
                 place = paddle.IPUPlace()
@@ -91,8 +87,7 @@ def _test_base(self, save_otherwise_load):
                     paddle.static.load(main_prog, self.attrs['model_path'].name)
 
                 ipu_strategy = paddle.static.IpuStrategy()
-                ipu_strategy.set_graph_config(
-                    is_training=self.attrs['is_training'])
+                ipu_strategy.set_graph_config(is_training=True)
                 ipu_strategy.set_precision_config(
                     enable_fp16=self.attrs['enable_fp16'])
                 ipu_program = paddle.static.IpuCompiledProgram(
@@ -131,62 +126,109 @@ def test_base(self):
         self.attrs['model_path'].cleanup()
 
 
+class TestMomentum(TestBase):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Momentum, learning_rate=1e-1)
+
+
 class TestAdam(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {}
-        self.attrs['steps'] = 100
-        self.attrs['save_at_step'] = 20
-        self.attrs['is_training'] = True
-        self.attrs['opt_type'] = 'adam'
-        self.attrs['enable_fp16'] = False
-        self.attrs['model_path'] = tempfile.TemporaryDirectory()
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Adam, learning_rate=1e-1)
 
 
 class TestLamb(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {}
-        self.attrs['steps'] = 100
-        self.attrs['save_at_step'] = 20
-        self.attrs['is_training'] = True
-        self.attrs['opt_type'] = 'lamb'
-        self.attrs['enable_fp16'] = False
-        self.attrs['model_path'] = tempfile.TemporaryDirectory()
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Lamb, learning_rate=1e-1)
+
+
+class TestAdamW(TestBase):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.AdamW, learning_rate=1e-1)
+
+
+class TestAdamax(TestBase):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Adamax, learning_rate=1e-1)
+
+
+class TestAdagrad(TestBase):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Adagrad, learning_rate=1e-1)
+
+
+class TestAdadelta(TestBase):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Adagrad, learning_rate=1e-1)
+
+
+class TestRMSProp(TestBase):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.RMSProp, learning_rate=1e-1)
+
+
+class TestCenteredRMSProp(TestBase):
+    def set_optimizer(self):
+        self.optimizer = partial(
+            paddle.optimizer.RMSProp, learning_rate=1e-1, centered=True)
 
 
 @unittest.skipIf(IPUOpTest.use_ipumodel(), "skip for ipumodel")
 class TestSGDFP16(TestBase):
-    def set_op_attrs(self):
+    def set_attrs(self):
         self.attrs = {}
         self.attrs['steps'] = 100
         self.attrs['save_at_step'] = 20
-        self.attrs['is_training'] = True
-        self.attrs['opt_type'] = 'sgd'
         self.attrs['enable_fp16'] = True
         self.attrs['model_path'] = tempfile.TemporaryDirectory()
 
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.SGD, learning_rate=1e-1)
 
-@unittest.skipIf(IPUOpTest.use_ipumodel(), "skip for ipumodel")
-class TestAdamFP16(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {}
-        self.attrs['steps'] = 100
-        self.attrs['save_at_step'] = 20
-        self.attrs['is_training'] = True
-        self.attrs['opt_type'] = 'adam'
-        self.attrs['enable_fp16'] = True
-        self.attrs['model_path'] = tempfile.TemporaryDirectory()
 
+class TestMomentumFp16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Momentum, learning_rate=1e-1)
 
-@unittest.skipIf(IPUOpTest.use_ipumodel(), "skip for ipumodel")
-class TestLambFP16(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {}
-        self.attrs['steps'] = 100
-        self.attrs['save_at_step'] = 20
-        self.attrs['is_training'] = True
-        self.attrs['opt_type'] = 'lamb'
-        self.attrs['enable_fp16'] = True
-        self.attrs['model_path'] = tempfile.TemporaryDirectory()
+
+class TestAdamFP16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Adam, learning_rate=1e-1)
+
+
+class TestLambFP16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Lamb, learning_rate=1e-1)
+
+
+class TestAdamWFP16FP16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.AdamW, learning_rate=1e-1)
+
+
+class TestAdamaxFP16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Adamax, learning_rate=1e-1)
+
+
+class TestAdagradFP16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Adagrad, learning_rate=1e-1)
+
+
+class TestAdadeltaFP16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.Adagrad, learning_rate=1e-1)
+
+
+class TestRMSPropFP16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(paddle.optimizer.RMSProp, learning_rate=1e-1)
+
+
+class TestCenteredRMSPropFP16(TestSGDFP16):
+    def set_optimizer(self):
+        self.optimizer = partial(
+            paddle.optimizer.RMSProp, learning_rate=1e-1, centered=True)
 
 
 if __name__ == "__main__":
diff --git a/tools/dockerfile/Dockerfile.ipu b/tools/dockerfile/Dockerfile.ipu
index 715bd34b908be..08536ae401fe1 100644
--- a/tools/dockerfile/Dockerfile.ipu
+++ b/tools/dockerfile/Dockerfile.ipu
@@ -1,10 +1,10 @@
 # A image for building paddle binaries
 
 # build docker image
-# docker build -t paddlepaddle/paddle:ipu-dev-2.3.0 -f tools/dockerfile/Dockerfile.ipu .
+# docker build -t paddlepaddle/paddle:latest-dev-ipu -f tools/dockerfile/Dockerfile.ipu .
 
 # run a container
-# docker run --ulimit memlock=-1:-1 --net=host --cap-add=IPC_LOCK --device=/dev/infiniband/ --ipc=host --rm -it paddlepaddle/paddle:ipu-dev-2.3.0 bash
+# docker run --ulimit memlock=-1:-1 --net=host --cap-add=IPC_LOCK --device=/dev/infiniband/ --ipc=host --rm -it paddlepaddle/paddle:latest-dev-ipu bash
 
 FROM graphcore/poplar:2.3.0
 MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>