Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… add_profile_record_for_dygraph_op
  • Loading branch information
pangyoki committed Jan 24, 2022
2 parents 62ee03a + e106901 commit 98bd465
Show file tree
Hide file tree
Showing 83 changed files with 3,108 additions and 365 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ paddle/fluid/API_PR.spec
paddle/fluid/op_use_default_grad_maker_DEV.spec
paddle/fluid/op_use_default_grad_maker_PR.spec
paddle/pten/api/*/api*
paddle/pten/include/*
paddle/pten/extension.h

*.DS_Store
*.vs
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ ENDIF()

if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220104")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220116")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
Expand Down
103 changes: 81 additions & 22 deletions cmake/pten_kernel.cmake → cmake/pten.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,48 @@
# See the License for the specific language governing permissions and
# limitations under the License.

function(generate_unify_header DIR_NAME)
set(options "")
set(oneValueArgs HEADER_NAME SKIP_SUFFIX)
set(multiValueArgs "")
cmake_parse_arguments(generate_unify_header "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})

# get header name and suffix
set(header_name "${DIR_NAME}")
list(LENGTH generate_unify_header_HEADER_NAME generate_unify_header_HEADER_NAME_len)
if(${generate_unify_header_HEADER_NAME_len} GREATER 0)
set(header_name "${generate_unify_header_HEADER_NAME}")
endif()
set(skip_suffix "")
list(LENGTH generate_unify_header_SKIP_SUFFIX generate_unify_header_SKIP_SUFFIX_len)
if(${generate_unify_header_SKIP_SUFFIX_len} GREATER 0)
set(skip_suffix "${generate_unify_header_SKIP_SUFFIX}")
endif()

# generate target header file
set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/include/${header_name}.h)
file(WRITE ${header_file} "// Header file generated by paddle/pten/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n")

# get all top-level headers and write into header file
file(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}\/${DIR_NAME}\/*.h")
foreach(header ${HEADERS})
if("${skip_suffix}" STREQUAL "")
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
file(APPEND ${header_file} "#include \"${header}\"\n")
else()
string(FIND "${header}" "${skip_suffix}.h" skip_suffix_found)
if(${skip_suffix_found} EQUAL -1)
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
file(APPEND ${header_file} "#include \"${header}\"\n")
endif()
endif()
endforeach()
# append header into extension.h
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}")
file(APPEND ${pten_extension_header_file} "#include \"${header_file}\"\n")
endfunction()

# call kernel_declare need to make sure whether the target of input exists
function(kernel_declare TARGET_LIST)
foreach(kernel_path ${TARGET_LIST})
Expand Down Expand Up @@ -103,38 +145,55 @@ function(kernel_library TARGET)
list(LENGTH gpu_srcs gpu_srcs_len)
list(LENGTH xpu_srcs xpu_srcs_len)

if (${common_srcs_len} GREATER 0)
# If the kernel has a device independent public implementation,
# we will use this implementation and will not adopt the implementation
# under specific devices
# Build Target according different src organization
if((${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR
${xpu_srcs_len} GREATER 0) AND ${common_srcs_len} GREATER 0)
# If the common_srcs depends on specific device srcs, build target using this rule.
if (WITH_GPU)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
nv_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET} SRCS ${common_srcs} DEPS ${TARGET}_part)
endif()
elseif (WITH_ROCM)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
hip_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET} SRCS ${common_srcs} DEPS ${TARGET}_part)
endif()
else()
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
cc_library(${TARGET}_part SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
cc_library(${TARGET} SRCS ${common_srcs} DEPS ${TARGET}_part)
endif()
endif()
elseif (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_ROCM)
hip_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
else()
cc_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
cc_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
endif()
else()
# If the kernel has a header file declaration, but no corresponding
# implementation can be found, this is not allowed
if (${cpu_srcs_len} EQUAL 0 AND ${gpu_srcs_len} EQUAL 0 AND
${xpu_srcs_len} EQUAL 0)
message(FATAL_ERROR "Cannot find any implementation for ${TARGET}")
if (${common_srcs_len} EQUAL 0)
message(FATAL_ERROR "Cannot find any implementation for ${TARGET}")
else()
# If the kernel has a device independent public implementation,
# we will use this implementation and will not adopt the implementation
# under specific devices
if (WITH_GPU)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
nv_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
hip_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
else()
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
cc_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
cc_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
endif()
endif()
endif()

if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR
Expand Down
33 changes: 30 additions & 3 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ std::unordered_map<std::string, std::vector<std::string>>
core_ops_returns_info = {};
std::unordered_map<std::string, std::vector<std::string>> core_ops_args_info =
{};
std::unordered_map<std::string, std::vector<std::string>>
core_ops_args_type_info = {};

/* --- Static maps to handle corner cases --- */
static std::unordered_map<std::string, paddle::framework::AttributeMap>
Expand Down Expand Up @@ -1225,10 +1227,16 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
*/
VLOG(6) << "Generating Dygraph Forward Function";

std::string generated_function_body = "";
const char* FORWARD_FUNCTION_TEMPLATE =
" VLOG(3) << \"Running Eager Forward Op: %s\";\n";
std::string generated_function_body =
paddle::string::Sprintf(FORWARD_FUNCTION_TEMPLATE, op_type);

std::string dygraph_function_args_str = "";
core_ops_args_info[op_type] = {};
core_ops_args_type_info[op_type] = {};
core_ops_args_info[op_type].resize(in_vars.size());
core_ops_args_type_info[op_type].resize(in_vars.size());

/* ------ Dygraph forward function generation ------ */
generated_function_body += " // Dygraph Forward Pass\n";
Expand All @@ -1246,10 +1254,14 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
"const std::vector<egr::EagerTensor>& %s";
input_args_str_list[input_position] =
paddle::string::Sprintf(FWD_INS_ARG_TEMPLATE, input_name);

core_ops_args_type_info[op_type][input_position] = "list";
} else {
const char* FWD_INS_ARG_TEMPLATE = "const egr::EagerTensor& %s";
input_args_str_list[input_position] =
paddle::string::Sprintf(FWD_INS_ARG_TEMPLATE, input_name);

core_ops_args_type_info[op_type][input_position] = "tensor";
}
core_ops_args_info[op_type][input_position] = input_name;

Expand Down Expand Up @@ -1318,11 +1330,14 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name);
dygraph_function_args_str += arg_str;

core_ops_args_type_info[op_type].push_back("list");
} else {
const char* FWD_NUM_ARG_TEMPLATE = ", egr::EagerTensor* %s";
std::string arg_str =
paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name);
dygraph_function_args_str += arg_str;

core_ops_args_type_info[op_type].push_back("tensor");
}
const char* FWD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::TrySyncToVars(%s) },";
Expand All @@ -1344,6 +1359,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
outs_contents_str += paddle::string::Sprintf(FWD_OUTS_CONTENT_TEMPLATE,
output_name, outnum);
core_ops_args_info[op_type].push_back(outnum);
core_ops_args_type_info[op_type].push_back("int");
} else {
const char* FWD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", "
Expand Down Expand Up @@ -1811,6 +1827,11 @@ static std::string GenerateGradNodeCCContents(
}
*/

const char* EAGER_LOG_TEMPLATE =
" VLOG(3) << \"Running Eager Backward Node: GradNode%s\";\n";
std::string generated_grad_function_body =
paddle::string::Sprintf(EAGER_LOG_TEMPLATE, fwd_op_type);

// This is a Copy
auto op_base_infos = bwd_info.GetOpBaseInfos();

Expand All @@ -1829,7 +1850,6 @@ static std::string GenerateGradNodeCCContents(
op_base_infos.emplace_back(std::move(op_base_info));
}

std::string generated_grad_function_body = "";
size_t outs_size = 0;
for (size_t i = 0; i < op_base_infos.size(); i++) {
const auto& op_base_info = op_base_infos[i];
Expand Down Expand Up @@ -2030,6 +2050,9 @@ static std::string GenerateDygraphHFileIncludes() {
dygraph_forward_api_includes_str +=
"extern std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_info;\n";
dygraph_forward_api_includes_str +=
"extern std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_type_info;\n";
dygraph_forward_api_includes_str +=
"extern std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_returns_info;\n\n";
Expand Down Expand Up @@ -2126,16 +2149,20 @@ static std::string GenerateCoreOpsReturnsInfo() {
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_info = { %s };\n"
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_type_info = { %s };\n"
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_returns_info = { %s };\n";

std::string core_ops_args_info_init_str =
ConvertCoreOpsInfosToString(core_ops_args_info);
std::string core_ops_args_type_info_init_str =
ConvertCoreOpsInfosToString(core_ops_args_type_info);
std::string core_ops_returns_info_init_str =
ConvertCoreOpsInfosToString(core_ops_returns_info);

std::string core_ops_info_str = paddle::string::Sprintf(
Core_Ops_Returns_MAP_TEMPLATE, core_ops_args_info_init_str,
core_ops_returns_info_init_str);
core_ops_args_type_info_init_str, core_ops_returns_info_init_str);

return core_ops_info_str;
}
Expand Down
23 changes: 18 additions & 5 deletions paddle/fluid/eager/legacy/infer_shape_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,17 +222,30 @@ class EagerInferShapeContext : public paddle::framework::InferShapeContext {
paddle::framework::DataLayout::kMKLDNN));
}

// TODO(paddle-dev): Can this be template?
std::vector<paddle::framework::InferShapeVarPtr> GetInputVarPtrs(
const std::string& name) const override {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"GetInputVarPtrs not support in dygraph runtime context"));
std::vector<paddle::framework::InferShapeVarPtr> res;
auto it = tensor_in_->find(name);
PADDLE_ENFORCE_NE(it, tensor_in_->end(),
paddle::platform::errors::NotFound(
"Can not find [%s] in inputs.", name));
for (auto& tensor : it->second) {
res.emplace_back(tensor->MutableVar());
}
return res;
}

std::vector<paddle::framework::InferShapeVarPtr> GetOutputVarPtrs(
const std::string& name) const override {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"GetOutputVarPtrs not support in dygraph runtime context"));
std::vector<paddle::framework::InferShapeVarPtr> res;
auto it = tensor_out_->find(name);
PADDLE_ENFORCE_NE(it, tensor_out_->end(),
paddle::platform::errors::NotFound(
"Can not find [%s] in outputs.", name));
for (auto& tensor : it->second) {
res.emplace_back(tensor->MutableVar());
}
return res;
}

DDim GetInputDim(const std::string& name) const override {
Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor
framework_proto selected_rows_utils data_device_transform data_type_transform data_layout_transform)

cc_library(attribute SRCS attribute.cc DEPS framework_proto boost enforce)
cc_test(attribute_test SRCS attribute_test.cc DEPS attribute framework_proto proto_desc)
cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc
device_context)

Expand All @@ -191,11 +192,11 @@ cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_va
IF(WITH_XPU)
cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
pten pten_utils kernel_factory)
pten pten_utils kernel_factory infershape_utils)
ELSE()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
pten pten_utils kernel_factory)
pten pten_utils kernel_factory infershape_utils)
ENDIF()

cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
Expand Down Expand Up @@ -407,6 +408,7 @@ cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tens
cc_library(generator SRCS generator.cc DEPS enforce place)

cc_library(pten_utils SRCS pten_utils.cc DEPS lod_tensor selected_rows_utils place pten var_type_traits pten_api_utils op_info)
cc_library(infershape_utils SRCS infershape_utils.cc DEPS lod_tensor selected_rows_utils attribute place pten var_type_traits pten pten_api_utils op_info shape_inference)

# Get the current working branch
execute_process(
Expand Down
33 changes: 33 additions & 0 deletions paddle/fluid/framework/attribute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,39 @@ limitations under the License. */
namespace paddle {
namespace framework {

paddle::any GetAttrValue(const Attribute& attr) {
if (attr.type() == typeid(int)) {
return paddle::any(BOOST_GET_CONST(int, attr));
} else if (attr.type() == typeid(float)) {
return paddle::any(BOOST_GET_CONST(float, attr));
} else if (attr.type() == typeid(std::string)) {
return paddle::any(BOOST_GET_CONST(std::string, attr));
} else if (attr.type() == typeid(std::vector<int>)) {
return paddle::any(BOOST_GET_CONST(std::vector<int>, attr));
} else if (attr.type() == typeid(std::vector<float>)) {
return paddle::any(BOOST_GET_CONST(std::vector<float>, attr));
} else if (attr.type() == typeid(std::vector<std::string>)) {
return paddle::any(BOOST_GET_CONST(std::vector<std::string>, attr));
} else if (attr.type() == typeid(bool)) {
return paddle::any(BOOST_GET_CONST(bool, attr));
} else if (attr.type() == typeid(std::vector<bool>)) {
return paddle::any(BOOST_GET_CONST(std::vector<bool>, attr));
} else if (attr.type() == typeid(BlockDesc*)) {
return paddle::any(BOOST_GET_CONST(BlockDesc*, attr));
} else if (attr.type() == typeid(int64_t)) {
return paddle::any(BOOST_GET_CONST(int64_t, attr));
} else if (attr.type() == typeid(std::vector<BlockDesc*>)) {
return paddle::any(BOOST_GET_CONST(std::vector<BlockDesc*>, attr));
} else if (attr.type() == typeid(std::vector<int64_t>)) {
return paddle::any(BOOST_GET_CONST(std::vector<int64_t>, attr));
} else if (attr.type() == typeid(std::vector<double>)) {
return paddle::any(BOOST_GET_CONST(std::vector<double>, attr));
} else {
PADDLE_THROW(
platform::errors::Unimplemented("Unsupported Attribute value type."));
}
}

Attribute GetAttrValue(const proto::OpDesc::Attr& attr_desc) {
switch (attr_desc.type()) {
case proto::AttrType::BOOLEAN: {
Expand Down
Loading

0 comments on commit 98bd465

Please sign in to comment.