Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support feed op new ir #54840

Merged
merged 14 commits into from
Jun 26, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -952,8 +952,8 @@ void BuildOpFuncList(

auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data();

if (op_name == "builtin.combine") {
VLOG(6) << "skip process pd.fetch op";
if (op_name == "builtin.combine" || op_name == "pd.feed") {
VLOG(6) << "skip process " << op_name;
continue;
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/new_executor/new_ir_interpreter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ FetchList NewIRInterpreter::Run(const std::vector<std::string>& feed_names,
local_scope_,
value_2_var_name_map_,
execution_config_);
SetFeedVarsInplaceSkip(feed_names);
// SetFeedVarsInplaceSkip(feed_names);
// convert vec func_list to graph
Convert(&op_func_nodes);
UpdateSyncOpNum();
Expand Down
1 change: 0 additions & 1 deletion paddle/fluid/framework/new_executor/standalone_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
if (FLAGS_enable_new_ir_in_executor) {
VLOG(6) << "begin to translate" << std::endl;
auto base_program = paddle::TranslateLegacyProgramToProgram(*program);

auto kernel_program =
paddle::dialect::PdOpLowerToKernelPass(base_program.get());

Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/ir/dialect/pd_op.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
inputs: []
attrs:
- {typename: str, name: name}
- {typename: int, name: col}
outputs:
- {typename: Tensor, name: out, optional: false, intermediate: false}
no_need_buffer: null
Expand Down
51 changes: 30 additions & 21 deletions paddle/fluid/ir/pass/pd_op_to_kernel_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ phi::KernelKey GetKernelKey(
ir::Operation* op,
const phi::Place& place,
const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair) {
if (op->name() == "pd.feed") {
return {phi::Backend::CPU, phi::DataLayout::ANY, phi::DataType::FLOAT32};
}
phi::Backend kernel_backend = phi::Backend::UNDEFINED;
phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
Expand Down Expand Up @@ -110,7 +113,9 @@ phi::KernelKey GetKernelKey(
continue;
}
auto input_tmp = op->operand(i).source();

auto new_input_tmp = map_value_pair.at(input_tmp);

auto input_type = new_input_tmp.type();
dialect::AllocatedDenseTensorType type;
if (input_type.isa<dialect::AllocatedDenseTensorType>()) {
Expand Down Expand Up @@ -181,32 +186,34 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {

std::vector<ir::Type> op_output_types;
if ((*it)->num_results() > 0) {
auto result_type = (*it)->result(0).type();
if (result_type.isa<dialect::DenseTensorType>()) {
auto allocated_dense_tensor_dtype =
paddle::dialect::AllocatedDenseTensorType::get(
ctx,
phi::TransToPhiPlace(kernel_key.backend()),
result_type.dyn_cast<dialect::DenseTensorType>());
op_output_types.push_back(allocated_dense_tensor_dtype);
} else if (result_type.isa<ir::VectorType>()) {
auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];

if (pos1.isa<dialect::DenseTensorType>()) {
for (size_t i = 0; i < (*it)->num_results(); ++i) {
auto result_type = (*it)->result(i).type();
if (result_type.isa<dialect::DenseTensorType>()) {
auto allocated_dense_tensor_dtype =
paddle::dialect::AllocatedDenseTensorType::get(
ctx,
phi::TransToPhiPlace(kernel_key.backend()),
pos1.dyn_cast<dialect::DenseTensorType>());
result_type.dyn_cast<dialect::DenseTensorType>());
op_output_types.push_back(allocated_dense_tensor_dtype);
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support dense tensor in vector type for now"));
} else if (result_type.isa<ir::VectorType>()) {
auto pos1 = result_type.dyn_cast<ir::VectorType>().data()[0];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里直接取了[0],是说当result_type为vector时,内在元素至少有1个?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个问题 和下面的问题 是一起的,原来仅支持单个输出, 我升级了多输出,但是这块的逻辑没有适配对


if (pos1.isa<dialect::DenseTensorType>()) {
auto allocated_dense_tensor_dtype =
paddle::dialect::AllocatedDenseTensorType::get(
ctx,
phi::TransToPhiPlace(kernel_key.backend()),
pos1.dyn_cast<dialect::DenseTensorType>());
op_output_types.push_back(allocated_dense_tensor_dtype);
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support dense tensor in vector type for now"));
}

ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
op_output_types.clear();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里为什么要clear op_output_types? op_output_types的每个index对应的不是result_type所在的位置么,这里的clear是放在其中的一个分支里的。
对于:[Tensor0, [Tensor1], Tensor2],会导致最后的 op_output_types 只是 [[Tensor1], Tensor2] ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这块确实是一个bug,仅在有一个 VectorType的情况是对的,其他的场景是有bug的,我看看怎么修复

Copy link
Collaborator Author

@phlrain phlrain Jun 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在 pr #54865 中修复

op_output_types.push_back(t1);
}

ir::Type t1 = ir::VectorType::get(ctx, op_output_types);
op_output_types.clear();
op_output_types.push_back(t1);
}
}

Expand Down Expand Up @@ -249,7 +256,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {

// only deal with single output
if ((*it)->num_results() > 0) {
map_value_pair[(*it)->result(0)] = op1->result(0);
for (size_t i = 0; i < (*it)->num_results(); ++i) {
map_value_pair[(*it)->result(i)] = op1->result(i);
}
}

program->block()->push_back(op1);
Expand Down
82 changes: 63 additions & 19 deletions paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,27 @@ void BuildScope(ir::Block* block,
continue;
}

if (op_name == "pd.feed") {
auto ptr = (*it)->result(0);
std::string name = "inner_var_" + std::to_string(count++);
name_map->emplace(ptr, name);
auto var = scope->Var(name);
// TODO(phlrain): need to update here, support StringTensor
auto out_tensor = var->GetMutable<phi::DenseTensor>();

name_map->emplace(ptr, name);

auto feed_var = scope->Var("feed");
int index =
(*it)->attributes().at("col").dyn_cast<ir::Int32Attribute>().data();
auto feed_list = feed_var->Get<paddle::framework::FeedList>();
auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index)));

out_tensor->ShareDataWith(in_tensor);

continue;
}

if (op_name == "builtin.combine") {
auto out_value = (*it)->result(0);

Expand Down Expand Up @@ -162,12 +183,12 @@ void BuildInferMetaContext(
auto runtime_info = std::get<3>(op_yaml_info);

// int input_index = 0;

std::vector<std::string> vec_param_list = runtime_info.infer_meta_param;

for (size_t input_index = 0; input_index < vec_param_list.size();
input_index++) {
auto& t = vec_param_list[input_index];

if (input_index_map.count(t)) {
// get information from input
ir::Value ptr = op->operand(input_index_map[t]).source();
Expand Down Expand Up @@ -197,7 +218,7 @@ void BuildInferMetaContext(
if (var->IsType<phi::DenseTensor>()) {
const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
ctx->EmplaceBackInput(const_cast<phi::TensorBase*>(tensor_in));
} else {
} else if (var->IsType<paddle::framework::TensorRefArray>()) {
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>
inputs;
auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
Expand All @@ -206,6 +227,9 @@ void BuildInferMetaContext(
}

ctx->EmplaceBackInputs(std::move(inputs));
} else {
PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
var->Type()));
}
}
}
Expand Down Expand Up @@ -238,8 +262,7 @@ void BuildInferMetaContext(
}
}

// update here, support fetch list for now
// [todo update here]
// TODO(phlrain): use var type instead of op name
if (op->attributes().count("op_name") &&
(op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
"pd.fetch")) {
Expand All @@ -249,9 +272,11 @@ void BuildInferMetaContext(
auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
ctx->EmplaceBackOutput(out_tensor);
} else {
ir::Value out_ptr = op->result(0);
auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
for (size_t i = 0; i < op->num_results(); ++i) {
ir::Value out_ptr = op->result(i);
auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
}
}
}

Expand Down Expand Up @@ -293,10 +318,14 @@ void BuildPhiKernelContext(
// get information from input
ir::Value ptr = op->operand(input_index_map[t]).source();
auto in_var_name = name_map.at(ptr);

if (input_map != nullptr) {
// only deal with single input for now, [todo] need support multi input
// like concat
// TODO(phlrain): OpFuncNode need input_index and output_index,
// construct input_index and output_here, should remove input_index and
// output_index from OpFuncNode Each in_var_name named "inner_var_" +
// index, len("inner_var_") = 10

size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str());
(*input_map)[std::to_string(input_index_map.at(t))].push_back(tmp_id);
}
Expand Down Expand Up @@ -331,14 +360,21 @@ void BuildPhiKernelContext(
if (var->IsType<phi::DenseTensor>()) {
const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
ctx->EmplaceBackInput(tensor_in);
} else {
} else if (var->IsType<paddle::framework::TensorRefArray>()) {
paddle::small_vector<const phi::TensorBase*> inputs;
auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
for (size_t i = 0; i < tensor_array.size(); ++i) {
inputs.emplace_back(tensor_array[i]);
}

ctx->EmplaceBackInputs(std::move(inputs));
} else if (var->IsType<paddle::framework::FeedList>()) {
auto feed_list = var->Get<paddle::framework::FeedList>();
auto* in_tensor = &(PADDLE_GET(phi::DenseTensor, feed_list.at(0)));
ctx->EmplaceBackOutput(in_tensor);
} else {
PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
var->Type()));
}
}
}
Expand Down Expand Up @@ -371,6 +407,7 @@ void BuildPhiKernelContext(
}
}

// TODO(phlrain): use var type instead of op name
if (op->attributes().count("op_name") &&
(op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
"pd.fetch")) {
Expand All @@ -380,16 +417,23 @@ void BuildPhiKernelContext(
auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
ctx->EmplaceBackOutput(out_tensor);
} else {
ir::Value out_ptr = op->result(0);
auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
&(scope->Var(name)->Get<phi::DenseTensor>())));

if (output_map != nullptr) {
// only deal with single input for now, [todo] need support multi input
// like concat
size_t tmp_id = std::atol(name.substr(4, 100).c_str());
(*output_map)["out"].push_back(tmp_id);
for (size_t i = 0; i < op->num_results(); ++i) {
ir::Value out_ptr = op->result(i);
auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
&(scope->Var(name)->Get<phi::DenseTensor>())));

if (output_map != nullptr) {
// only deal with single input for now, [todo] need support multi input
// like concat
// TODO(phlrain): OpFuncNode need input_index and output_index,
// construct input_index and output_here, should remove input_index and
// output_index from OpFuncNode Each in_var_name named "inner_var_" +
// index, len("inner_var_") = 10

size_t tmp_id = std::atol(name.substr(4, 100).c_str());
(*output_map)["out"].push_back(tmp_id);
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/ir_adaptor/translator/op_translator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ ir::Operation* FeedOpHandler(ir::IrContext* ctx,
GenerateOperationOutput(ctx, op_desc, output_infos);
ir::AttributeMap attribute_map = {
{"name", ir::StrAttribute::get(ctx, op_desc.OutputArgumentNames()[0])},
{"col",
ir::Int32Attribute::get(ctx, op_desc.GetAttrIfExists<int>("col"))},
};

ir::Operation* operation =
Expand Down
1 change: 1 addition & 0 deletions paddle/phi/api/yaml/op_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@
out : Out

- op : atan2
backward : atan2_grad
inputs :
{x : X1, y : X2}
outputs :
Expand Down
1 change: 1 addition & 0 deletions python/paddle/fluid/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1635,6 +1635,7 @@ def _can_use_interpreter_core(program, place):
)

self._feed_data(program, feed, feed_var_name, scope)

if hasattr(program, 'lr_scheduler'):
from paddle.optimizer.lr import LRScheduler

Expand Down
55 changes: 43 additions & 12 deletions test/ir/new_ir/test_standalone_new_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ def test_with_new_ir(self):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)

x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32")
main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32")

z = x + y
out = exe.run(
paddle.static.default_main_program(), {}, fetch_list=[z.name]
)
z = x + y
out = exe.run(main_program, {}, fetch_list=[z.name])

gold_res = np.ones([2, 2], dtype="float32") * 2

Expand All @@ -45,18 +47,47 @@ def test_with_new_ir(self):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)

x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32")
main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32")

z = paddle.linalg.multi_dot([x, y])
out = exe.run(
paddle.static.default_main_program(), {}, fetch_list=[z.name]
)
z = paddle.linalg.multi_dot([x, y])
out = exe.run(main_program, {}, fetch_list=[z.name])

gold_res = np.ones([2, 2], dtype="float32") * 2

np.testing.assert_array_equal(out[0], gold_res)


class TestFeedOp(unittest.TestCase):
def test_with_new_ir(self):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)

main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.static.data("x", [2, 2], dtype="float32")
y = paddle.static.data("y", [2, 2], dtype="float32")

z = x + y

np_a = np.random.rand(2, 2).astype("float32")
np_b = np.random.rand(2, 2).astype("float32")
out = exe.run(
main_program,
feed={"x": np_a, "y": np_b},
fetch_list=[z.name],
)

gold_res = np_a + np_b

np.testing.assert_array_equal(out[0], gold_res)


if __name__ == "__main__":
unittest.main()