From 39e92b9fb27c94a5d7519b00db7196489541246e Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 21 Mar 2018 11:32:23 +0800 Subject: [PATCH 1/2] Temporarily fix bug for backward tanspiler when using parallel_do operator. --- python/paddle/fluid/backward.py | 16 ++++++++++++++-- python/paddle/fluid/layers/detection.py | 4 ++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 7af6ed1463ab7..8deb93a9e684b 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -307,16 +307,28 @@ def _append_backward_ops_(block, sub_block = program.block(op.block_attr("sub_block")) grad_sub_block = program.create_block() grad_sub_block.set_forward_block_idx(sub_block.idx) + + all_vars = op.block.vars + target_vars = [all_vars[name] for name in op.output_arg_names] + no_grad_set = copy.copy(no_grad_dict[sub_block.idx]) + new_no_grad_dict = _get_stop_gradients_(sub_block.program) + new_no_grad_dict[0].update(map(_append_grad_suffix_, no_grad_set)) + block_no_grad_set = set( + map(_strip_grad_suffix_, new_no_grad_dict[0])) + op_path = _find_op_path_(sub_block, target_vars, [], + block_no_grad_set) + no_grad_dict[0].update(map(_append_grad_suffix_, block_no_grad_set)) + cb = _callback_lookup_(op) if cb is not None: if callbacks is None: new_callbacks = [cb] else: new_callbacks = callbacks + [_callback_lookup_(op)] - _append_backward_ops_(sub_block, sub_block.ops, grad_sub_block, + _append_backward_ops_(sub_block, op_path, grad_sub_block, no_grad_dict, grad_to_var, new_callbacks) else: - _append_backward_ops_(sub_block, sub_block.ops, grad_sub_block, + _append_backward_ops_(sub_block, op_path, grad_sub_block, no_grad_dict, grad_to_var, callbacks) program.rollback() diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index a889ab6bdc6ac..5e131737749ec 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -129,13 +129,11 @@ class number, M is number of bounding boxes. For each category prior_box_var=prior_box_var, target_box=loc, code_type='decode_center_size') - old_shape = scores.shape scores = ops.reshape(x=scores, shape=(-1, old_shape[-1])) scores = nn.softmax(input=scores) scores = ops.reshape(x=scores, shape=old_shape) scores = nn.transpose(scores, perm=[0, 2, 1]) - nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) helper.append_op( type="multiclass_nms", @@ -695,6 +693,8 @@ def _prior_box_(input, outputs={"Boxes": box, "Variances": var}, attrs=attrs, ) + box.stop_gradient = True + var.stop_gradient = True return box, var def _reshape_with_axis_(input, axis=1): From a7dad506f1eee85fdf290740868c9b7d67867c8e Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 22 Mar 2018 15:14:47 +0800 Subject: [PATCH 2/2] Fix bug for backward tanspiler when using parallel_do operator --- paddle/fluid/operators/box_coder_op.cc | 3 ++- paddle/fluid/operators/detection_map_op.cc | 4 ++-- paddle/fluid/operators/iou_similarity_op.cc | 5 +++-- paddle/fluid/operators/mine_hard_examples_op.cc | 5 +++-- paddle/fluid/operators/prior_box_op.cc | 4 +++- paddle/fluid/operators/target_assign_op.cc | 4 ++-- python/paddle/fluid/backward.py | 16 ++-------------- python/paddle/fluid/layers/detection.py | 3 +++ 8 files changed, 20 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/operators/box_coder_op.cc b/paddle/fluid/operators/box_coder_op.cc index eccdd408a17a0..ec416f725e75f 100644 --- a/paddle/fluid/operators/box_coder_op.cc +++ b/paddle/fluid/operators/box_coder_op.cc @@ -126,6 +126,7 @@ width and height. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(box_coder, ops::BoxCoderOp, ops::BoxCoderOpMaker); +REGISTER_OPERATOR(box_coder, ops::BoxCoderOp, ops::BoxCoderOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(box_coder, ops::BoxCoderKernel, ops::BoxCoderKernel); diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index 73c84c2fe0155..93ef15b933216 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -188,8 +188,8 @@ The general steps are as follows. First, calculate the true positive and } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(detection_map, ops::DetectionMAPOp, - ops::DetectionMAPOpMaker); +REGISTER_OPERATOR(detection_map, ops::DetectionMAPOp, ops::DetectionMAPOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( detection_map, ops::DetectionMAPOpKernel, ops::DetectionMAPOpKernel); diff --git a/paddle/fluid/operators/iou_similarity_op.cc b/paddle/fluid/operators/iou_similarity_op.cc index ffbd7c7814c3f..4b78ec510d1fb 100755 --- a/paddle/fluid/operators/iou_similarity_op.cc +++ b/paddle/fluid/operators/iou_similarity_op.cc @@ -87,8 +87,9 @@ IOU(A, B) = } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(iou_similarity, ops::IOUSimilarityOp, - ops::IOUSimilarityOpMaker); +REGISTER_OPERATOR(iou_similarity, ops::IOUSimilarityOp, + ops::IOUSimilarityOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( iou_similarity, diff --git a/paddle/fluid/operators/mine_hard_examples_op.cc b/paddle/fluid/operators/mine_hard_examples_op.cc index 0e81d60878dce..277901cff4934 100644 --- a/paddle/fluid/operators/mine_hard_examples_op.cc +++ b/paddle/fluid/operators/mine_hard_examples_op.cc @@ -324,8 +324,9 @@ MatchIndices elements with value -1. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(mine_hard_examples, ops::MineHardExamplesOp, - ops::MineHardExamplesOpMaker); +REGISTER_OPERATOR(mine_hard_examples, ops::MineHardExamplesOp, + ops::MineHardExamplesOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( mine_hard_examples, diff --git a/paddle/fluid/operators/prior_box_op.cc b/paddle/fluid/operators/prior_box_op.cc index 7ba55437cb20f..c22a55bce2634 100644 --- a/paddle/fluid/operators/prior_box_op.cc +++ b/paddle/fluid/operators/prior_box_op.cc @@ -168,7 +168,9 @@ Please get more information from the following papers: } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(prior_box, ops::PriorBoxOp, ops::PriorBoxOpMaker); +REGISTER_OPERATOR(prior_box, ops::PriorBoxOp, ops::PriorBoxOpMaker, + paddle::framework::EmptyGradOpMaker); + REGISTER_OP_CPU_KERNEL( prior_box, ops::PriorBoxOpKernel, ops::PriorBoxOpKernel); diff --git a/paddle/fluid/operators/target_assign_op.cc b/paddle/fluid/operators/target_assign_op.cc index a894b12fa35a1..33ff967e5e8f5 100644 --- a/paddle/fluid/operators/target_assign_op.cc +++ b/paddle/fluid/operators/target_assign_op.cc @@ -153,8 +153,8 @@ template struct NegTargetAssignFunctor, diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 8deb93a9e684b..7af6ed1463ab7 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -307,28 +307,16 @@ def _append_backward_ops_(block, sub_block = program.block(op.block_attr("sub_block")) grad_sub_block = program.create_block() grad_sub_block.set_forward_block_idx(sub_block.idx) - - all_vars = op.block.vars - target_vars = [all_vars[name] for name in op.output_arg_names] - no_grad_set = copy.copy(no_grad_dict[sub_block.idx]) - new_no_grad_dict = _get_stop_gradients_(sub_block.program) - new_no_grad_dict[0].update(map(_append_grad_suffix_, no_grad_set)) - block_no_grad_set = set( - map(_strip_grad_suffix_, new_no_grad_dict[0])) - op_path = _find_op_path_(sub_block, target_vars, [], - block_no_grad_set) - no_grad_dict[0].update(map(_append_grad_suffix_, block_no_grad_set)) - cb = _callback_lookup_(op) if cb is not None: if callbacks is None: new_callbacks = [cb] else: new_callbacks = callbacks + [_callback_lookup_(op)] - _append_backward_ops_(sub_block, op_path, grad_sub_block, + _append_backward_ops_(sub_block, sub_block.ops, grad_sub_block, no_grad_dict, grad_to_var, new_callbacks) else: - _append_backward_ops_(sub_block, op_path, grad_sub_block, + _append_backward_ops_(sub_block, sub_block.ops, grad_sub_block, no_grad_dict, grad_to_var, callbacks) program.rollback() diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 5e131737749ec..cd519e1ee082d 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -473,6 +473,7 @@ def __reshape_to_2d(var): # 2. Compute confidence for mining hard examples # 2.1. Get the target label based on matched indices gt_label = ops.reshape(x=gt_label, shape=gt_label.shape + (1, )) + gt_label.stop_gradient = True target_label, _ = target_assign( gt_label, matched_indices, mismatch_value=background_label) # 2.2. Compute confidence loss. @@ -480,10 +481,12 @@ def __reshape_to_2d(var): confidence = __reshape_to_2d(confidence) target_label = tensor.cast(x=target_label, dtype='int64') target_label = __reshape_to_2d(target_label) + target_label.stop_gradient = True conf_loss = nn.softmax_with_cross_entropy(confidence, target_label) # 3. Mining hard examples conf_loss = ops.reshape(x=conf_loss, shape=(num, num_prior)) + conf_loss.stop_gradient = True neg_indices = helper.create_tmp_variable(dtype='int32') dtype = matched_indices.dtype updated_matched_indices = helper.create_tmp_variable(dtype=dtype)