Skip to content

Commit

Permalink
Using small vector for slot and merge edge into grad_slot_meta (#42350)
Browse files Browse the repository at this point in the history
  • Loading branch information
JiabinYang authored Apr 29, 2022
1 parent 27cf7af commit 2bee99d
Show file tree
Hide file tree
Showing 35 changed files with 432 additions and 419 deletions.
10 changes: 6 additions & 4 deletions paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,12 @@ void EagerReducer::TraverseBackwardGraph(const std::vector<Tensor> &outputs) {
while (!queue.empty()) {
egr::GradNodeBase *node = queue.front();
queue.pop();
const std::vector<std::vector<egr::Edge>> &edges = node->GetEdges();
for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) {
const egr::Edge &edge = edges[i][j];
const paddle::small_vector<std::vector<egr::GradSlotMeta>,
egr::kSlotSmallVectorSize> &metas =
node->OutputMeta();
for (size_t i = 0; i < metas.size(); i++) {
for (size_t j = 0; j < metas[i].size(); j++) {
const egr::Edge &edge = metas[i][j].GetEdge();
auto next_node_shared = edge.GetMutableGradNode();
if (!next_node_shared || !next_node_shared.get()) {
continue;
Expand Down
16 changes: 10 additions & 6 deletions paddle/fluid/eager/accumulation/accumulation_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,13 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
}
}

std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation::
operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph, bool is_new_grad) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
GradNodeAccumulation::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph,
bool is_new_grad) {
VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation";
PADDLE_ENFORCE(grads.size() == 1,
paddle::platform::errors::Fatal(
Expand All @@ -56,8 +59,9 @@ operator()(
// Apply Gradient Hooks
paddle::experimental::Tensor grad_out;
if (GradientHooksRegistered()) {
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads =
ApplyGradientHooks(grads);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
hooked_grads = ApplyGradientHooks(grads);
grad_out = hooked_grads[0][0];
} else {
grad_out = grads[0][0];
Expand Down
9 changes: 6 additions & 3 deletions paddle/fluid/eager/accumulation/accumulation_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,12 @@ class GradNodeAccumulation : public GradNodeBase {
}

// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false, bool is_new_grad = false) override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;

void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }

Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/eager/amp_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ namespace egr {

static inline paddle::experimental::DataType GetPromoteType(
const std::string& op_name,
const std::vector<std::vector<paddle::experimental::Tensor>>&
amp_tensors_vector,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector,
const paddle::experimental::DataType& amp_dtype) {
auto dst_type = amp_dtype;
if (egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype() ==
Expand Down Expand Up @@ -86,8 +86,8 @@ static inline paddle::experimental::DataType GetPromoteType(

inline paddle::experimental::DataType GetAmpDestDtype(
const std::string& op_name,
const std::vector<std::vector<paddle::experimental::Tensor>>&
amp_tensors_vector) {
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector) {
auto amp_dtype =
egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype();
auto amp_level = egr::Controller::Instance().GetAMPLevel();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,27 +144,34 @@ void GradNodeScale::SetTensorWrappers_X(

void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; }

std::vector<std::vector<paddle::experimental::Tensor>> GradNodeScale::
operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph, bool is_new_grad) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
GradNodeScale::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph,
bool is_new_grad) {
// 1. Check Output Size
VLOG(6) << "grad size is: " << grads.size();
PADDLE_ENFORCE(
((grads.size() == 1) && (grads[0].size() == 1)),
paddle::platform::errors::Fatal(
"ScaleGradNode takes exactly 1 grad tensor."
"However received: %d",
"This indicates an issue with Eager Dygraph Backward logic",
grads.size()));
std::vector<std::vector<paddle::experimental::Tensor>> outs;
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
outs;
// 2. Create needed out parttern
paddle::experimental::Tensor out;
// Apply Gradient Hooks
if (GradientHooksRegistered()) {
// TODO(jiabin): Shall we apply hook slot by slot here or accept
// vector<vector<phi::tensor>> to apply all hooks?
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads =
ApplyGradientHooks(grads);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
hooked_grads = ApplyGradientHooks(grads);
ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */,
true /* bias_after_scale */, &out);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,20 @@ class GradNodeScale : public GradNodeBase {
~GradNodeScale() override = default;

// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false, bool is_new_grad = false) override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;

void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }

void SetTensorWrappers_X(
const std::vector<paddle::experimental::Tensor>& tensors);

void SetAttributes_scale(float scale);
std::string name() override { return ""; }
std::string name() override { return "scale node"; }
// Members: define fwd input tensors
// For Scale there is no fwd input tensor needed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x,
// Pass Attributes to GradNode
scale_node->SetAttributes_scale(scale);

// Set Next Edges
scale_node->AddEdges(p_autograd_in, /*slot id*/ 0);

// Set TensorWrappers
scale_node->SetTensorWrappers_X({x});

Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/eager/api/utils/global_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
#include <memory>
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/utils/small_vector.h"
namespace egr {

constexpr size_t kSlotSmallVectorSize = 15U;
class UniqueNameGenerator {
public:
explicit UniqueNameGenerator(std::string prefix = "") : prefix_(prefix) {}
Expand Down
31 changes: 15 additions & 16 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1187,11 +1187,6 @@ static std::string GenerateGradNodeCreationContent(
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);

const char* ADD_EDGES_TEMPLATE =
" if(%s) grad_node->AddEdges(%s, %d);\n";
grad_node_creation_str +=
paddle::string::Sprintf(ADD_EDGES_TEMPLATE, input_autograd_name,
input_autograd_name, input_position);
} else {
compute_require_grad_args += ", &" + input_autograd_name;
size_t input_position = fwd_inputs_name_pos_map.at(input_name);
Expand All @@ -1200,10 +1195,6 @@ static std::string GenerateGradNodeCreationContent(
" grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);

const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
ADD_EDGES_TEMPLATE, input_autograd_name, input_position);
}
}

Expand Down Expand Up @@ -1649,7 +1640,8 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
std::string amp_logic_str = "";
if (in_vars.size() != 0) {
const char* AMP_TENSORS_VECTOR_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> "
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"amp_tensors_vector = { "
"%s };\n";
std::string amp_tensors_vector = paddle::string::Sprintf(
Expand Down Expand Up @@ -2428,9 +2420,11 @@ static std::string GenerateGradNodeCCContents(
}

const char* BWD_RETURN_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = "
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> hooked_grads = "
"GradNode%s::ApplyGradientHooks(grads);\n"
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n"
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> outputs(%d);\n"
" %s\n"
" if(NeedComplexToRealConversion()) "
"HandleComplexGradToRealGrad(&outputs);\n"
Expand All @@ -2441,9 +2435,11 @@ static std::string GenerateGradNodeCCContents(

// [Generation] Get Full Grad Function
const char* GRAD_FUNCTION_TEMPLATE =
"std::vector<std::vector<paddle::experimental::Tensor>> "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"GradNode%s::operator()("
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize>& grads, bool "
"create_graph, bool is_new_grad) {\n"
"%s"
"%s"
Expand Down Expand Up @@ -2487,9 +2483,12 @@ static std::string GenerateGradNodeHeaderContents(
"Construct GradNode%s \"; }\n"
" ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n"
"\n"
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
" virtual "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"operator()("
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize>& grads, bool "
"create_graph = false, bool is_new_grad = false) "
"override;\n"
"\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ class {} : public egr::GradNodeBase {{
egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}}
~{}() override = default;
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph = false, bool is_new_grad = false) override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph = false, bool is_new_grad = false) override;
std::string name() override {{ return \"{}\"; }}
void ClearTensorWrappers() override {{
Expand Down Expand Up @@ -149,7 +149,7 @@ class {} : public egr::GradNodeBase {{

GRAD_FUNCTION_TEMPLATE = \
"""
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph, bool is_new_grad) {{
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> {}::operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph, bool is_new_grad) {{
// Fill Zero For GradIn Tensors
{}
Expand Down Expand Up @@ -239,7 +239,6 @@ class {} : public egr::GradNodeBase {{
// Set TensorWrappers for Forward Inputs
{}
// SetGradOutMeta & SetEdges
{}
{}
// SetOutRank & SetHistory & SetGradInMeta & RetainGrad
{}
Expand Down Expand Up @@ -356,7 +355,7 @@ class {} : public egr::GradNodeBase {{
if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{
VLOG(5) << "Check and Prepare For AMP";
{}
std::vector<std::vector<paddle::experimental::Tensor>> amp_tensors_vector = {};
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = {};
{}
{}
{}
Expand Down Expand Up @@ -769,15 +768,11 @@ def GenerateNodeCreationCodes(self):
is_optional = (name in self.optional_inputs)
if is_optional:
set_grad_out_meta = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetGradOutMeta(*({name}.get_ptr()), {pos});"
set_edges = f"{indent}if({name}.get_ptr() != nullptr) grad_node->AddEdges({input_autograd_meta_name}, {pos});"
else:
set_grad_out_meta = f"{indent}grad_node->SetGradOutMeta({name}, {pos});"
set_edges = f"{indent}grad_node->AddEdges({input_autograd_meta_name}, {pos});"

set_grad_out_meta_list.append(set_grad_out_meta)
set_edges_list.append(set_edges)
set_grad_out_meta_str = "\n".join(set_grad_out_meta_list)
set_edges_str = "\n".join(set_edges_list)

# SetOutRank & SetHistory & SetGradInMeta
set_out_rank_list = []
Expand Down Expand Up @@ -808,7 +803,7 @@ def GenerateNodeCreationCodes(self):
self.node_creation_str = FORWARD_BODY_TEMPLATE.format(
node_creation_event_str, pass_stop_gradient_args_str,
node_construction_str, set_attributes_str,
set_input_tensor_wrappers_str, set_grad_out_meta_str, set_edges_str,
set_input_tensor_wrappers_str, set_grad_out_meta_str,
set_out_rank_str, set_history_str, set_grad_in_meta_str,
set_retain_grad_str, set_output_tensor_wrappers_str)

Expand Down Expand Up @@ -1454,7 +1449,7 @@ def GenerateNodeDefinition(self, grad_node_creation_str):

# Construct grad_api returns
slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys())
returns_str = f"{indent}std::vector<std::vector<paddle::experimental::Tensor>> returns({slot_num_bwd_outputs});\n"
returns_str = f"{indent}paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs});\n"
for name, (ttype, fwd_position,
grad_api_position) in backward_grad_outputs_map.items():
transformed_tensor_name = self.TransformToNextGradName(name)
Expand Down
Loading

0 comments on commit 2bee99d

Please sign in to comment.