Skip to content

Commit

Permalink
[Cherry-pick] Fix indexing shape bug and Optimize (#62117)
Browse files Browse the repository at this point in the history
* tensor_array slice in PIR (#60503)

* use slice_array, now will meet error of destory opresult still in use

* disable the pir test until the bug fixed

* Optimize advanced setting by remove the last set_value (#60771)

* pure-advanced setitem will not set_value back

* fix multi output in tensor_array_pir

* only in dynamic mode

* add only advanced-setting case to fix coverage

* fast pass for bool setitem (#61021)

* fast pass for bool setitem

* fix 0-size value case

* remove final set_value OP in combined-indexing setting (#60983)

* remove setvalue in combined indexing set

* using combined-setting case to test

* Optimize index put preprocess (#61060)

* reduce vector operations when no bool index

* reduce vector in index_put

* reduce vector operations

* no change for value

* fix shape error in combine-getitem (#61922)
  • Loading branch information
zoooo0820 authored Feb 28, 2024
1 parent f4d9adf commit 609f55e
Show file tree
Hide file tree
Showing 7 changed files with 335 additions and 183 deletions.
183 changes: 125 additions & 58 deletions paddle/fluid/pybind/eager_method.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ typedef SSIZE_T ssize_t;
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#include "paddle/common/ddim.h"
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/eager_amp_auto_cast.h"
#include "paddle/fluid/framework/python_headers.h"
Expand Down Expand Up @@ -1359,6 +1360,7 @@ static PyObject* tensor__getitem_dygraph(TensorObject* self,
&use_strided_slice);

// step2: Dealing with basic indexing
bool out_is_view = false;
auto out = getTensorWithBasicIndexing(tensor,
&slice_axes,
&slice_starts,
Expand All @@ -1367,7 +1369,8 @@ static PyObject* tensor__getitem_dygraph(TensorObject* self,
&decrease_axis,
&none_axes,
&infer_flags,
&use_strided_slice);
&use_strided_slice,
&out_is_view);

if (!has_advanced_index) {
return ToPyObject(out);
Expand All @@ -1386,7 +1389,8 @@ static PyObject* tensor__getitem_dygraph(TensorObject* self,
&trans_back_dim,
&pos_of_new_dim,
&rank_of_new_dim,
&trans_dim);
&trans_dim,
&out_is_view);

if (transed_index.size() == 1 &&
transed_index[0].dtype() == phi::DataType::BOOL) {
Expand Down Expand Up @@ -1416,14 +1420,14 @@ static PyObject* tensor__getitem_dygraph(TensorObject* self,

if (pos_of_new_dim != 0) {
std::vector<int> perm(out.shape().size(), 0);
int tmp1 = pos_of_new_dim, tmp2 = 0,
int tmp1 = rank_of_new_dim, tmp2 = 0,
tmp3 = pos_of_new_dim + rank_of_new_dim;
for (int i = 0; i < static_cast<int>(out.shape().size()); ++i) {
if (i < rank_of_new_dim) {
if (i < pos_of_new_dim) {
perm[i] =
tmp1++; // range(pos_of_new_dim, pos_of_new_dim + rank_of_new_dim)
} else if (i >= rank_of_new_dim && i < pos_of_new_dim + rank_of_new_dim) {
perm[i] = tmp2++; // range(0, pos_of_new_dim)
tmp1++; // range(rank_of_new_dim, pos_of_new_dim + rank_of_new_dim)
} else if (i >= pos_of_new_dim && i < pos_of_new_dim + rank_of_new_dim) {
perm[i] = tmp2++; // range(0, rank_of_new_dim)
} else {
perm[i] = tmp3++; // range(pos_of_new_dim + rank_of_new_dim, out.ndim)
}
Expand Down Expand Up @@ -1681,6 +1685,7 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
// 3. assign values to the sliced result by index_put OP;
// 4. transpose back and assign the result to original tensor by set_value
// OP.
bool out_is_view = false;
paddle::Tensor sub_tensor = getTensorWithBasicIndexing(tensor,
&slice_axes,
&slice_starts,
Expand All @@ -1689,7 +1694,8 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
&decrease_axis,
&none_axes,
&infer_flags,
&use_strided_slice);
&use_strided_slice,
&out_is_view);

std::vector<paddle::Tensor> transed_index;
std::vector<int> trans_back_dim, trans_dim;
Expand All @@ -1705,65 +1711,126 @@ static PyObject* tensor__setitem_dygraph(TensorObject* self,
&trans_back_dim,
&pos_of_new_dim,
&rank_of_new_dim,
&trans_dim);
&trans_dim,
&out_is_view);

// Release gil and do tracing
py::gil_scoped_release release;

if (value_tensor.initialized() &&
(self->tensor.dtype() != value_tensor.dtype())) {
if (egr::Controller::Instance().GetAMPLevel() !=
paddle::imperative::AmpLevel::O0) {
paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>
tmps = {{self->tensor}, {value_tensor}};
auto amp_dtype = egr::GetAmpDestDtype("index_put", tmps);
self->tensor = egr::EagerAmpAutoCast(
self->tensor.name(), self->tensor, amp_dtype, "index_put");
value_tensor = egr::EagerAmpAutoCast(
value_tensor.name(), value_tensor, amp_dtype, "index_put");
}
if (value_tensor.initialized()) {
if (self->tensor.dtype() != value_tensor.dtype()) {
value_tensor = cast_ad_func(value_tensor, self->tensor.dtype());
if (egr::Controller::Instance().GetAMPLevel() !=
paddle::imperative::AmpLevel::O0) {
paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>
tmps = {{self->tensor}, {value_tensor}};
auto amp_dtype = egr::GetAmpDestDtype("index_put", tmps);
self->tensor = egr::EagerAmpAutoCast(
self->tensor.name(), self->tensor, amp_dtype, "index_put");
value_tensor = egr::EagerAmpAutoCast(
value_tensor.name(), value_tensor, amp_dtype, "index_put");
}
if (self->tensor.dtype() != value_tensor.dtype()) {
value_tensor = cast_ad_func(value_tensor, self->tensor.dtype());
}
}
}

if (value_tensor.dims().size() > 1 && pos_of_new_dim != 0) {
value_tensor = transpose_ad_func(value_tensor, trans_dim);
}
if (value_tensor.dims().size() > 1 && pos_of_new_dim != 0) {
value_tensor = transpose_ad_func(value_tensor, trans_dim);
}

// TODO(zoooo0820) 1.Using inplace version index_put
// 2.Remove following code after backward bug fixed.
transed_sub_tensor = assign_ad_func(transed_sub_tensor);
const phi::distributed::ProcessMesh* mesh = nullptr;
if (InputsContainDistTensor(
&mesh, self->tensor, transed_sub_tensor, value_tensor)) {
ConvertAllInputsToDistTensor(
mesh, self->tensor, transed_sub_tensor, value_tensor);
}

const phi::distributed::ProcessMesh* mesh = nullptr;
if (InputsContainDistTensor(
&mesh, self->tensor, transed_sub_tensor, value_tensor)) {
ConvertAllInputsToDistTensor(
mesh, self->tensor, transed_sub_tensor, value_tensor);
}
if (transed_index.size() == 1 &&
transed_index[0].dtype() == phi::DataType::BOOL &&
transed_index[0].shape().size() == self->tensor.shape().size()) {
if (value_tensor.shape() != self->tensor.shape()) {
value_tensor = expand_ad_func(value_tensor, self->tensor.shape());
}
transed_sub_tensor =
where__ad_func(logical_not_ad_func(transed_index[0]),
transed_sub_tensor,
value_tensor);
} else {
transed_sub_tensor =
index_put__ad_func(transed_sub_tensor, transed_index, value_tensor);
}

transed_sub_tensor =
index_put_ad_func(transed_sub_tensor, transed_index, value_tensor);

paddle::Tensor transback_sub_tensor =
transpose_ad_func(transed_sub_tensor, trans_back_dim);

self->tensor = set_value_with_tensor__ad_func(self->tensor,
transback_sub_tensor,
slice_starts,
slice_ends,
slice_strides,
slice_axes,
decrease_axis,
none_axes);
if (PyCheckTensor(value_obj)) {
// pass the stop_gradient from value to tensor.
// pass stop gradient should be done after CheckInplace in
// set_value__dygraph_function.
if (!egr::EagerUtils::autograd_meta(&value_tensor)->StopGradient() &&
egr::EagerUtils::autograd_meta(&self->tensor)->StopGradient()) {
egr::EagerUtils::autograd_meta(&self->tensor)->SetStopGradient(false);
if (out_is_view) {
// NOTE(zoooo0820): if out_is_view is true, it is a case of
// combined-indexing setitem, i.e. firstly we get a view of
// self->tensor, then modified it with inplace api index_put_ For now,
// in design of Paddle, the forward result is right. But the backward
// edge can not be established because the Base Tensor cannot sense
// whether it has been modified by other operations. Following codes are
// to add a new node (set_value_with_tensor_grad) to record the backward
// edge, with out ad_function which needs to do the forward calculation.

egr::AutogradMeta* x_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(self->tensor);
egr::AutogradMeta* values_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(transed_sub_tensor);
bool trace_backward = egr::Controller::Instance().HasGrad();
bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(
trace_backward, x_autograd_meta, values_autograd_meta);
// Node Declaration
std::shared_ptr<SetValueWithTensorGradNode> grad_node;
// Set grad_node before API Call
if (require_any_grad) {
paddle::Tensor transback_sub_tensor =
transpose_ad_func(transed_sub_tensor, trans_back_dim);
const auto& values_tmp =
(require_any_grad && transback_sub_tensor.is_dense_tensor() &&
!std::dynamic_pointer_cast<phi::DenseTensor>(
transback_sub_tensor.impl())
->meta()
.is_contiguous())
? paddle::Tensor(
std::make_shared<phi::DenseTensor>(
std::move(paddle::experimental::Trans2Contiguous(
*(std::dynamic_pointer_cast<phi::DenseTensor>(
transback_sub_tensor.impl()))))),
transback_sub_tensor.mutable_autograd_meta())
: transback_sub_tensor;

grad_node = std::shared_ptr<SetValueWithTensorGradNode>(
new SetValueWithTensorGradNode(1, 2)); // NOLINT
grad_node->SetAttributestarts(slice_starts);
grad_node->SetAttributeends(slice_ends);
grad_node->SetAttributesteps(slice_strides);
grad_node->SetAttributeaxes(slice_axes);
grad_node->SetAttributedecrease_axes(decrease_axis);
grad_node->SetAttributenone_axes(none_axes);
grad_node->SetTensorWrappervalues(values_tmp);

paddle::memory::LogDeviceMemoryStats(
egr::Controller::Instance().GetExpectedPlace(),
"set_value_with_tensor");
egr::EagerUtils::CheckInplace(
self->tensor, x_autograd_meta, require_any_grad);
egr::EagerUtils::PassStopGradient(false, x_autograd_meta);
// SetGradOutMeta & SetEdges
grad_node->SetGradOutMeta(self->tensor, 0);
grad_node->SetGradOutMeta(transback_sub_tensor, 1);
if (x_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(x_autograd_meta, 0);
egr::EagerUtils::SetHistory(x_autograd_meta, grad_node);
}
grad_node->SetGradInMeta(self->tensor, 0);
}
}
if (PyCheckTensor(value_obj)) {
// pass the stop_gradient from value to tensor.
// pass stop gradient should be done after CheckInplace in
// set_value__dygraph_function.
if (!egr::EagerUtils::autograd_meta(&value_tensor)->StopGradient() &&
egr::EagerUtils::autograd_meta(&self->tensor)->StopGradient()) {
egr::EagerUtils::autograd_meta(&self->tensor)->SetStopGradient(false);
}
}
}
}
Expand Down
9 changes: 7 additions & 2 deletions paddle/fluid/pybind/slice_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,11 +347,13 @@ static paddle::Tensor getTensorWithBasicIndexing(
std::vector<int64_t>* decrease_axis,
std::vector<int64_t>* none_axes,
std::vector<int64_t>* infer_flags,
bool* use_strided_slice) {
bool* use_strided_slice,
bool* out_is_view) {
paddle::Tensor out;
if (slice_axes->empty()) {
out = tensor;
} else {
*out_is_view = true;
if (!(*use_strided_slice)) {
eager_gil_scoped_release guard;
out = slice_ad_func(tensor,
Expand All @@ -372,6 +374,7 @@ static paddle::Tensor getTensorWithBasicIndexing(
}
}
if (!none_axes->empty()) {
*out_is_view = true;
eager_gil_scoped_release guard;
// Deal with cases that decrease_axes is not empty
// For example:
Expand Down Expand Up @@ -400,7 +403,8 @@ static paddle::Tensor dealWithAdvancedIndex(
std::vector<int>* trans_back_dim,
int* pos_of_new_dim,
int* rank_of_new_dim,
std::vector<int>* trans_dim) {
std::vector<int>* trans_dim,
bool* out_is_view) {
int p = 0;
for (size_t i = 0; i < advanced_index_dim->size(); ++i) {
auto index_dim = (*advanced_index_dim)[i];
Expand Down Expand Up @@ -443,6 +447,7 @@ static paddle::Tensor dealWithAdvancedIndex(
if (original_dim_order == *trans_dim) {
transed_tensor = tensor;
} else {
*out_is_view = true;
transed_tensor = transpose_ad_func(tensor, *trans_dim);
}

Expand Down
Loading

0 comments on commit 609f55e

Please sign in to comment.