Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… develop
  • Loading branch information
chenwhql committed Feb 11, 2022
2 parents 887892c + a117497 commit e3ddd48
Show file tree
Hide file tree
Showing 175 changed files with 7,282 additions and 1,752 deletions.
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,15 @@ option(NEW_RELEASE_CUBIN "PaddlePaddle next-level release strategy for pypi cu
option(NEW_RELEASE_JIT "PaddlePaddle next-level release strategy for backup jit package" OFF)
option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU" OFF)
option(WITH_POCKETFFT "Compile with pocketfft support" ON)
option(WITH_RECORD_BUILDTIME "Compile PaddlePaddle with record all targets build time" OFF)

if(WITH_RECORD_BUILDTIME)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh")
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh")
else()
include(ccache) # set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
endif()
unset(WITH_RECORD_BUILDTIME CACHE)

# PY_VERSION
if(NOT PY_VERSION)
Expand Down Expand Up @@ -382,7 +391,6 @@ if(WITH_PROFILER)
add_definitions(-DWITH_GPERFTOOLS)
endif()

include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage
Expand Down
1 change: 0 additions & 1 deletion cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ if(NOT WITH_GPU)
return()
endif()


if(WITH_NV_JETSON)
add_definitions(-DWITH_NV_JETSON)
set(paddle_known_gpu_archs "53 62 72")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,13 @@ add_custom_target(eager_final_state_codegen
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_nodes_h_path} ${nodes_h_path}
VERBATIM
)

set(tmp_python_c_output_path "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h")
set(python_c_output_path "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/eager_final_state_op_function_impl.h")
add_custom_target(eager_final_state_python_c_codegen
COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py"
"--api_yaml_path=${api_yaml_path}"
"--output_path=${tmp_python_c_output_path}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_python_c_output_path} ${python_c_output_path}
VERBATIM
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import argparse
from eager_gen import ReadFwdFile, GetForwardFunctionName, ParseYamlForward, DetermineForwardPositionMap

atype_to_parsing_function = {
"bool": "CastPyArg2Boolean",
"int": "CastPyArg2Int",
"long": "CastPyArg2Long",
"float": "CastPyArg2Float",
"string": "CastPyArg2String",
"bool[]": "CastPyArg2Booleans",
"int[]": "CastPyArg2Ints",
"long[]": "CastPyArg2Longs",
"float[]": "CastPyArg2Floats",
"double[]": "CastPyArg2Float64s",
"string[]": "CastPyArg2Strings"
}

atype_to_cxx_type = {
"bool": "bool",
"int": "int",
"long": "long",
"float": "float",
"string": "std::string",
"bool[]": "std::vector<bool>",
"int[]": "std::vector<int>",
"long[]": "std::vector<long>",
"float[]": "std::vector<float>",
"double[]": "std::vector<double>",
"string[]": "std::vector<std::string>"
}


def ParseArguments():
parser = argparse.ArgumentParser(
description='Eager Code Generator Args Parser')
parser.add_argument('--api_yaml_path', type=str)
parser.add_argument('--output_path', type=str)

args = parser.parse_args()
return args


def GetCxxType(atype):
if atype not in atype_to_cxx_type.keys():
assert False

return atype_to_cxx_type[atype]


def FindParsingFunctionFromAttributeType(atype):
if atype not in atype_to_parsing_function.keys():
assert False

return atype_to_parsing_function[atype]


def GeneratePythonCFunction(fwd_api_name, forward_inputs_position_map,
forward_attrs_list, forward_outputs_position_map):
# forward_inputs_position_map = { "name" : [type, fwd_position] }
# forward_outputs_position_map = { "name" : [type, fwd_position] }
# forward_attrs_list = [ [attr_name, attr_type, default_value, orig_position], ...]

# Get EagerTensor from args
# Get dygraph function call args
num_args = len(forward_inputs_position_map.keys()) + len(forward_attrs_list)
num_input_tensors = len(forward_inputs_position_map.keys())
dygraph_function_call_list = ["" for i in range(num_args)]
get_eager_tensor_str = ""
for name, (ttype, pos) in forward_inputs_position_map.items():
get_eager_tensor_str += f" auto& {name} = GetTensorFromArgs(\"{fwd_api_name}\", \"{name}\", args, {pos}, false);\n"
dygraph_function_call_list[pos] = f"{name}"

parse_attributes_str = " paddle::framework::AttributeMap attrs;\n"
# Get Attributes
for name, atype, _, pos in forward_attrs_list:
parsing_function = FindParsingFunctionFromAttributeType(atype)
cxx_type = GetCxxType(atype)
key = f"{name}"

parse_attributes_str += f" PyObject* {name}_obj = PyTuple_GET_ITEM(args, {pos});\n"
parse_attributes_str += f" {cxx_type} {name} = {parsing_function}({name}_obj, \"{fwd_api_name}\", {pos});\n"

dygraph_function_call_list[pos] = f"{name}"
dygraph_function_call_str = ",".join(dygraph_function_call_list)

PYTHON_C_FUNCTION_TEMPLATE = """
static PyObject * eager_final_state_api_{}(PyObject *self, PyObject *args, PyObject *kwargs)
{{
PyThreadState *tstate = nullptr;
try
{{
// Get EagerTensors from args
{}
// Parse Attributes
{}
tstate = PyEval_SaveThread();
auto out = {}({});
PyEval_RestoreThread(tstate);
tstate = nullptr;
return ToPyObject(out);
}}
catch(...) {{
if (tstate) {{
PyEval_RestoreThread(tstate);
}}
ThrowExceptionToPython(std::current_exception());
return nullptr;
}}
}}
"""
python_c_function_str = PYTHON_C_FUNCTION_TEMPLATE.format(
fwd_api_name, get_eager_tensor_str, parse_attributes_str,
GetForwardFunctionName(fwd_api_name), dygraph_function_call_str)

python_c_function_reg_str = f"{{\"final_state_{fwd_api_name}\", (PyCFunction)(void(*)(void))eager_final_state_api_{fwd_api_name}, METH_VARARGS | METH_KEYWORDS, \"C++ interface function for {fwd_api_name} in dygraph.\"}}"

return python_c_function_str, python_c_function_reg_str


def GeneratePythonCWrappers(python_c_function_str, python_c_function_reg_str):

PYTHON_C_WRAPPER_TEMPLATE = """
#pragma once
#include "pybind11/detail/common.h"
#include "paddle/fluid/pybind/op_function_common.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/pybind/exception.h"
#include <Python.h>
namespace paddle {{
namespace pybind {{
{}
static PyMethodDef EagerFinalStateMethods[] = {{
{}
}};
}} // namespace pybind
}} // namespace paddle
"""
python_c_str = PYTHON_C_WRAPPER_TEMPLATE.format(python_c_function_str,
python_c_function_reg_str)

return python_c_str


def GeneratePythonCFile(filepath, python_c_str):
with open(filepath, 'a') as f:
f.write(python_c_str)


if __name__ == "__main__":
args = ParseArguments()

api_yaml_path = args.api_yaml_path
fwd_api_list = ReadFwdFile(api_yaml_path)

python_c_function_list = []
python_c_function_reg_list = []
for fwd_api in fwd_api_list:
# We only generate Ops with grad
if 'backward' not in fwd_api.keys():
continue

assert 'api' in fwd_api.keys()
assert 'args' in fwd_api.keys()
assert 'output' in fwd_api.keys()
assert 'backward' in fwd_api.keys()

fwd_api_name = fwd_api['api']
fwd_args_str = fwd_api['args']
fwd_returns_str = fwd_api['output']

# Collect Original Forward Inputs/Outputs and then perform validation checks
forward_inputs_list, forward_attrs_list, forward_returns_list = ParseYamlForward(
fwd_args_str, fwd_returns_str)
print("Parsed Original Forward Inputs List: ", forward_inputs_list)
print("Prased Original Forward Attrs List: ", forward_attrs_list)
print("Parsed Original Forward Returns List: ", forward_returns_list)

forward_inputs_position_map, forward_outputs_position_map = DetermineForwardPositionMap(
forward_inputs_list, forward_returns_list)
print("Generated Forward Input Position Map: ",
forward_inputs_position_map)
print("Generated Forward Output Position Map: ",
forward_outputs_position_map)

python_c_function_str, python_c_function_reg_str = GeneratePythonCFunction(
fwd_api_name, forward_inputs_position_map, forward_attrs_list,
forward_outputs_position_map)
python_c_function_list.append(python_c_function_str)
python_c_function_reg_list.append(python_c_function_reg_str)
print("Generated Python-C Function: ", python_c_function_str)

python_c_function_reg_list.append("{nullptr,nullptr,0,nullptr}")
python_c_functions_str = "\n".join(python_c_function_list)
python_c_functions_reg_str = ",\n".join(python_c_function_reg_list)

python_c_str = GeneratePythonCWrappers(python_c_functions_str,
python_c_functions_reg_str)
print("Generated Python-C Codes: ", python_c_str)

output_path = args.output_path
for path in [output_path]:
if os.path.exists(path):
os.remove(path)

GeneratePythonCFile(output_path, python_c_str)
18 changes: 16 additions & 2 deletions paddle/fluid/framework/details/eager_deletion_op_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,21 @@ void EagerDeletionOpHandle::CallOnce() {

std::string EagerDeletionOpHandle::Name() const { return "eager_deletion"; }

static bool CanBeErased(ir::MemOptVarInfo *var_info) {
if (var_info->IsSkippedAllMemoryOptimization() ||
!var_info->DecreaseRefCnt()) {
return false;
}
#ifdef PADDLE_WITH_CINN
// if parent_holder exists, it should meet deletion condition too.
std::shared_ptr<ir::MemOptVarInfo> parent_holder = var_info->ParentHolder();
if (parent_holder && !CanBeErased(parent_holder.get())) {
return false;
}
#endif
return true;
}

void EagerDeletionOpHandle::RunImpl() {
if (vars_.size() != var_infos_.size() || is_variant_scope_) {
vars_.clear();
Expand All @@ -117,8 +132,7 @@ void EagerDeletionOpHandle::RunImpl() {
std::deque<std::shared_ptr<memory::Allocation>> garbages;
for (size_t i = 0; i < var_infos_.size(); ++i) {
auto *var_info = var_infos_[i];
if (var_info->IsSkippedAllMemoryOptimization() ||
!var_info->DecreaseRefCnt()) {
if (!CanBeErased(var_info)) {
VLOG(4) << "skip memory optimization with var: " << var_info->Name();
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ int EmbeddingEltwiseLayerNormFusePass::BuildFusion(

if (end_patter_layernorms[k]->Op()->HasAttr("out_threshold")) {
new_op_desc.SetAttr("enable_int8", true);
new_op_desc.SetAttr(
"out_threshold",
end_patter_layernorms[k]->Op()->GetAttr("out_threshold"));
}

auto* embedding_eltwise_layernorm = graph->CreateOpNode(&new_op_desc);
Expand Down
14 changes: 10 additions & 4 deletions paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,19 @@ cc_library(recurrent_op_eager_deletion_pass SRCS recurrent_op_eager_deletion_pas
cc_library(reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle var_handle)
cc_library(reference_count_pass SRCS reference_count_pass.cc DEPS computation_op_handle graph graph_helper pass op_graph_view reference_count_pass_helper)

cc_library(eager_deletion_pass SRCS eager_deletion_pass.cc DEPS computation_op_handle
eager_deletion_op_handle graph graph_helper pass conditional_block_op_eager_deletion_pass while_op_eager_deletion_pass recurrent_op_eager_deletion_pass reference_count_pass_helper)
SET(EAGER_DELETETION_PASS_DEPS computation_op_handle eager_deletion_op_handle graph graph_helper pass conditional_block_op_eager_deletion_pass while_op_eager_deletion_pass recurrent_op_eager_deletion_pass reference_count_pass_helper)
if (WITH_CINN)
cc_library(share_varinfo_into_cinn_pass SRCS share_varinfo_into_cinn_pass.cc DEPS pass enforce graph_helper computation_op_handle eager_deletion_op_handle cinn_compiler)
cc_test(share_varinfo_into_cinn_pass_test SRCS share_varinfo_into_cinn_pass_test.cc DEPS share_varinfo_into_cinn_pass parallel_executor cinn_compiler elementwise_add_op mul_op cinn_launch_op)
list(APPEND EAGER_DELETETION_PASS_DEPS share_varinfo_into_cinn_pass)
endif()

cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handle reference_count_pass_helper share_tensor_buffer_op_handle graph pass multi_devices_helper)
cc_library(eager_deletion_pass SRCS eager_deletion_pass.cc DEPS ${EAGER_DELETETION_PASS_DEPS})

cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handle reference_count_pass_helper share_tensor_buffer_op_handle graph pass multi_devices_helper)

cc_library(buffer_shared_inplace_op_pass SRCS buffer_shared_inplace_op_pass.cc DEPS memory_reuse_pass executor_gc_helper)
cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_memory_reuse_pass.cc DEPS memory_reuse_pass)
cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_memory_reuse_pass.cc DEPS memory_reuse_pass)

cc_library(inplace_addto_op_pass SRCS inplace_addto_op_pass.cc DEPS memory_reuse_pass)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,13 @@ void EagerDeletionPass::ApplyImpl(ir::Graph *graph) const {
auto recurrent_op_eager_deletion_pass =
ir::PassRegistry::Instance().Get("recurrent_op_eager_deletion_pass");
recurrent_op_eager_deletion_pass->Apply(graph);

#ifdef PADDLE_WITH_CINN
auto share_varinfo_into_cinn_pass =
ir::PassRegistry::Instance().Get("share_varinfo_into_cinn_pass");
share_varinfo_into_cinn_pass->SetNotOwned(kMemOptVarInfoMapList, &var_infos);
share_varinfo_into_cinn_pass->Apply(graph);
#endif
}

} // namespace ir
Expand All @@ -300,3 +307,6 @@ REGISTER_PASS(eager_deletion_pass, paddle::framework::ir::EagerDeletionPass)
USE_PASS(conditional_block_op_eager_deletion_pass);
USE_PASS(while_op_eager_deletion_pass);
USE_PASS(recurrent_op_eager_deletion_pass);
#ifdef PADDLE_WITH_CINN
USE_PASS(share_varinfo_into_cinn_pass);
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ class MemOptVarInfo {
return skip_memory_reuse_ || skip_all_memory_optimization_;
}

void SetParentHolder(std::shared_ptr<MemOptVarInfo> parent) {
parent_holder_ = parent;
}

std::shared_ptr<MemOptVarInfo> ParentHolder() const { return parent_holder_; }

const std::string &Name() const { return name_; }

private:
Expand All @@ -88,6 +94,9 @@ class MemOptVarInfo {
std::atomic<size_t> runtime_ref_cnt_;
bool skip_memory_reuse_{false};
bool skip_all_memory_optimization_{false};
// point to var info of the same variable in the main graph,
// used in external(input/output) variables of a subgraph
std::shared_ptr<MemOptVarInfo> parent_holder_{nullptr};
};

using MemOptVarInfoMapList = std::vector<
Expand Down
Loading

1 comment on commit e3ddd48

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.