Skip to content

Commit

Permalink
add batches_per_step(python side) (PaddlePaddle#90)
Browse files Browse the repository at this point in the history
* add SetIpuIndexStage for model sharding/pipelinging

* add batches_per_step
  • Loading branch information
XBWGC authored Aug 25, 2021
1 parent ac5115a commit a871ac1
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 15 deletions.
10 changes: 5 additions & 5 deletions paddle/fluid/framework/ipu/ipu_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,14 @@ void IpuBackend::Prepare() {
for (popart::TensorId item : compiler_->GetOutputs()) {
anchor_ids.push_back(item);
}
auto dataFlow = popart::DataFlow(1, anchor_ids);
auto dataFlow = popart::DataFlow(ipu_strategy_->batches_per_step, anchor_ids);

PADDLE_ENFORCE_NOT_NULL(
curr_device_,
platform::errors::Unavailable("IPU device isn't attached, please call "
"IpuBackend::AttachDevice(id) first."));

if (ipu_strategy_ != nullptr && ipu_strategy_->is_training_) {
if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) {
VLOG(1) << "Creating TrainingSession from Onnx Model...";
auto popart_optimizer = GetPopartOptimizer();
auto tensors = compiler_->GetTensors();
Expand Down Expand Up @@ -170,7 +170,7 @@ void IpuBackend::Run(const std::vector<const Tensor*>& inputs,
popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id));
}

if (ipu_strategy_ != nullptr && ipu_strategy_->is_training_) {
if (ipu_strategy_ != nullptr && ipu_strategy_->is_training) {
VLOG(1) << "Update optimizer learning rate...";
auto popart_optimizer = GetPopartOptimizer();
auto session = dynamic_cast<popart::TrainingSession*>(session_.get());
Expand All @@ -197,12 +197,12 @@ float IpuBackend::GetLRFromScope() {

// ipu_num_ must be pow(2,n);
int IpuBackend::UpperIpuNum() {
PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus_, 0,
PADDLE_ENFORCE_GT(ipu_strategy_->num_ipus, 0,
platform::errors::Unavailable(
"The ipu num get is wrong, please make sure the "
"sharding or pipline parameter is right."));
int i = 0;
while (pow(2, i) < ipu_strategy_->num_ipus_) {
while (pow(2, i) < ipu_strategy_->num_ipus) {
i++;
}
return pow(2, i);
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/framework/ipu/ipu_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ namespace ipu {
using VirtualGraphMode = popart::VirtualGraphMode;

struct IpuStrategy {
int num_ipus_ = 1;
bool is_training_ = true;
int num_ipus = 1;
int batches_per_step = 1;
bool is_training = true;
popart::SessionOptions popart_options_;
};

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ipu/supported_ops_custom.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
auto inputs = GetOpInputs(op_desc); \
auto outputs = op_desc->Output("__outputs__"); \
/*num_outputs training mode 5, inference mode 1*/ \
auto num_outputs = ipu_strategy_->is_training_ ? 5 : 1; \
auto num_outputs = ipu_strategy_->is_training ? 5 : 1; \
auto epsilon = BOOST_GET_CONST(float, op_desc->GetAttr("epsilon")); \
auto momentum = BOOST_GET_CONST(float, op_desc->GetAttr("momentum")); \
auto result = builder_->aiOnnxOpset11().batchnormalization( \
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/ir/ipu/inference_extract_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ void InferenceExtractPass::ApplyImpl(ir::Graph* graph) const {
static std::shared_ptr<ipu::IpuStrategy> ipu_strategy_instance_(
new ipu::IpuStrategy());

ipu_strategy_instance_->is_training_ = false;
ipu_strategy_instance_->num_ipus_ = graph->Get<int>("num_ipus");
ipu_strategy_instance_->is_training = false;
ipu_strategy_instance_->num_ipus = graph->Get<int>("num_ipus");
ipu_strategy_instance_->popart_options_.enablePipelining =
graph->Get<bool>("enable_pipeline");
auto& enable_sharding = graph->Get<bool>("enable_sharding");
Expand Down
17 changes: 13 additions & 4 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3204,18 +3204,27 @@ All parameter, weight, gradient are variables in Paddle.
.def(py::init())
.def_property(
"num_ipus",
[](const ipu::IpuStrategy &self) { return self.num_ipus_; },
[](const ipu::IpuStrategy &self) { return self.num_ipus; },
[](ipu::IpuStrategy &self, int num_ipus) {
self.num_ipus_ = num_ipus;
self.num_ipus = num_ipus;
},
R"DOC(
Int type, set the number ipu we need. Default 1.
)DOC")
.def_property(
"batches_per_step",
[](const ipu::IpuStrategy &self) { return self.batches_per_step; },
[](ipu::IpuStrategy &self, int batches_per_step) {
self.batches_per_step = batches_per_step;
},
R"DOC(
Int type, set batches_per_step. Default 1.
)DOC")
.def_property(
"is_training",
[](const ipu::IpuStrategy &self) { return self.is_training_; },
[](const ipu::IpuStrategy &self) { return self.is_training; },
[](ipu::IpuStrategy &self, bool is_training) {
self.is_training_ = is_training;
self.is_training = is_training;
},
R"DOC(
Bool type, True for training, False inference. Default True.
Expand Down
11 changes: 10 additions & 1 deletion python/paddle/fluid/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def __init__(self, program, scope=None, ipu_strategy=None):
"popart_canonicalization_pass"
]

def compile(self, feed_list, fetch_list, scope=None):
def compile(self, feed_list, fetch_list, feed_var_name='feed', scope=None):
for pass_name in self._graph_passes:
graph_pass = core.get_pass(pass_name)
graph_pass.apply(self._graph)
Expand Down Expand Up @@ -557,6 +557,15 @@ def compile(self, feed_list, fetch_list, scope=None):
global_block = self._program.global_block()
program.lr_sheduler.lr_var = global_block.vars[lr_var_name]

# with popart, we need to support batches_per_step, what means
# the shape of feed_var and feed_tensor(maybe numpy array) will
# mismatch, so we set need_check_feed to False. Thus we can avoid
# modify logic of run.
program_global_block = program.global_block()
for feed_name in feed_list:
feed_var = program_global_block.var(feed_name)
feed_var.desc.set_need_check_feed(False)

return program


Expand Down

0 comments on commit a871ac1

Please sign in to comment.