Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

20230321 #1

Merged
merged 27 commits into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
7581ef9
[XPU] add fp16 support for compare ops. (#51846)
houj04 Mar 21, 2023
85b2fa4
Fix relu custom vjp (#51838)
JiabinYang Mar 21, 2023
9c238d2
fix softmaxce null point in shape test (#51850)
Difers Mar 21, 2023
01eeba5
[AMP OP&Test] Support fp16/bf16 for cumsum (#51694)
DesmonDay Mar 21, 2023
c31ffbe
【Hackathon No.31】 Fix Null pointer bug for Case4:paddle.incubate.grap…
zeroRains Mar 21, 2023
cdefcd0
[Zero-Dim] Support output 0D for argmin/argmax/median/kthvalue/mode/e…
zhwesky2010 Mar 21, 2023
0bb7c00
[Auto Parallel] Add patterns of rule based tuner (#51859)
Caozhou1995 Mar 21, 2023
1816ceb
[Unify headerfile] Inference headerfile includes paddle/extension.h (…
jiahy0825 Mar 21, 2023
325feca
Fix compile error in cublaslt (#51793)
ZzSean Mar 21, 2023
e0eb5cf
[OPT]Set order for hybridparallel setting (#51781)
ForFishes Mar 21, 2023
f3ef748
[CodeStyle][UP005] remove deprecated unittest alias (#51834)
Ainavo Mar 21, 2023
f82da79
[CodeStyle][C400] replace unnecessary generator list (#51839)
Ainavo Mar 21, 2023
4640f4b
[OPT] FlashAttention && ModelParallel (#51617)
ForFishes Mar 21, 2023
6824272
[Expose Phi Headers] Expose kernel header files of phi to site-packag…
jiahy0825 Mar 21, 2023
4638a62
[PHI decoupling] Move DataType* from paddle:experimental to phi names…
jinyouzhi Mar 21, 2023
cb1d6b5
fix mutable of custom place (#51710)
engineer1109 Mar 21, 2023
bef4e9f
[Prim Op Test]add public_python_api in prim test (#51829)
Charles-hit Mar 21, 2023
f47a5f7
[prim] simplify batch_norm composite rule (#51827)
cyber-pioneer Mar 21, 2023
e35afed
[Paddle-TRT] fix GN when params.c% params.cPerBlock != 0 (#51836)
zhoutianzi666 Mar 21, 2023
d2122c6
[CustomOP Inplace] Add customOP inplace check (#51844)
jiahy0825 Mar 21, 2023
88ad79d
【prim】sin and cos composite grad (#51748)
xiaoguoguo626807 Mar 21, 2023
457b9fb
fix build_ci_problem (#51910)
risemeup1 Mar 21, 2023
72c711b
【fluid clean】remove fluid.data (#50699)
GGBond8488 Mar 21, 2023
66ac259
fix bug of sync buffer (#51895)
ForFishes Mar 21, 2023
4dbf3a8
[Fix typo] Fix typo error in grad_node_info.h (#51607)
jiahy0825 Mar 21, 2023
cdc5896
remove unnecessary generator set and dict (#51845)
Ainavo Mar 21, 2023
16abb32
move split/reshape prim api to auto generated file
cxxly Mar 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 1 addition & 0 deletions cmake/cblas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ if(NOT DEFINED CBLAS_PROVIDER)
STATUS
"Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})"
)

message(
STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})"
)
Expand Down
3 changes: 3 additions & 0 deletions cmake/phi_header.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,6 @@ phi_header_path_compat(
file(RENAME
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/extension.h
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h)
# Included header file of training and inference can be unified as single file: paddle/extension.h
file(COPY ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h
DESTINATION ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/extension.h)
12 changes: 6 additions & 6 deletions paddle/fluid/distributed/collective/process_group_custom.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,20 +219,20 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupCustom::Collective(

void* XcclGetPointerByOffset(void* raw_pointer,
size_t offset,
experimental::DataType type) {
if (type == experimental::DataType::FLOAT32) {
phi::DataType type) {
if (type == phi::DataType::FLOAT32) {
return reinterpret_cast<void*>(reinterpret_cast<float*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::FLOAT64) {
} else if (type == phi::DataType::FLOAT64) {
return reinterpret_cast<void*>(reinterpret_cast<double*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::INT32) {
} else if (type == phi::DataType::INT32) {
return reinterpret_cast<void*>(reinterpret_cast<int32_t*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::INT64) {
} else if (type == phi::DataType::INT64) {
return reinterpret_cast<void*>(reinterpret_cast<int64_t*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::FLOAT16) {
} else if (type == phi::DataType::FLOAT16) {
return reinterpret_cast<void*>(reinterpret_cast<int16_t*>(raw_pointer) +
offset);
} else {
Expand Down
32 changes: 16 additions & 16 deletions paddle/fluid/distributed/collective/process_group_gloo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,19 @@ namespace distributed {
#ifdef _WIN32
#define GENERATE_FUNC(type, func, ...) \
switch (type) { \
case experimental::DataType::FLOAT32: \
case phi::DataType::FLOAT32: \
func<float>(__VA_ARGS__); \
break; \
case experimental::DataType::FLOAT64: \
case phi::DataType::FLOAT64: \
func<double>(__VA_ARGS__); \
break; \
case experimental::DataType::FLOAT16: \
case phi::DataType::FLOAT16: \
func<gloo::float16>(__VA_ARGS__); \
break; \
case experimental::DataType::INT32: \
case phi::DataType::INT32: \
func<int32_t>(__VA_ARGS__); \
break; \
case experimental::DataType::INT64: \
case phi::DataType::INT64: \
func<int64_t>(__VA_ARGS__); \
break; \
default: \
Expand All @@ -64,31 +64,31 @@ namespace distributed {
#else
#define GENERATE_FUNC(type, func, args...) \
switch (type) { \
case experimental::DataType::FLOAT32: \
case phi::DataType::FLOAT32: \
func<float>(args); \
break; \
case experimental::DataType::FLOAT64: \
case phi::DataType::FLOAT64: \
func<double>(args); \
break; \
case experimental::DataType::FLOAT16: \
case phi::DataType::FLOAT16: \
func<gloo::float16>(args); \
break; \
case experimental::DataType::INT32: \
case phi::DataType::INT32: \
func<int32_t>(args); \
break; \
case experimental::DataType::INT64: \
case phi::DataType::INT64: \
func<int64_t>(args); \
break; \
case experimental::DataType::INT8: \
case phi::DataType::INT8: \
func<int8_t>(args); \
break; \
case experimental::DataType::UINT8: \
case phi::DataType::UINT8: \
func<uint8_t>(args); \
break; \
case experimental::DataType::BOOL: \
case phi::DataType::BOOL: \
func<bool>(args); \
break; \
case experimental::DataType::BFLOAT16: \
case phi::DataType::BFLOAT16: \
func<bfloat16>(args); \
break; \
default: \
Expand Down Expand Up @@ -285,7 +285,7 @@ class AllreduceGlooTask : public ProcessGroupGloo::GlooTask {
const ReduceOp _reduce_op;
uint32_t _tag;

gloo::AllreduceOptions::Func _get_function(const experimental::DataType type,
gloo::AllreduceOptions::Func _get_function(const phi::DataType type,
const ReduceOp op) {
gloo::AllreduceOptions::Func fn;
GENERATE_FUNC(type, _get_function_impl, fn, op);
Expand Down Expand Up @@ -457,7 +457,7 @@ class ReduceGlooTask : public ProcessGroupGloo::GlooTask {
int _dst;
uint32_t _tag;

gloo::ReduceOptions::Func _get_function(const experimental::DataType type,
gloo::ReduceOptions::Func _get_function(const phi::DataType type,
const ReduceOp op) {
gloo::ReduceOptions::Func fn;
GENERATE_FUNC(type, _get_function_impl, fn, op);
Expand Down
22 changes: 10 additions & 12 deletions paddle/fluid/distributed/collective/process_group_nccl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -993,34 +993,32 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupNCCL::AllGather(
CommType::ALLGATHER);
}

void* GetPointerByOffset(void* raw_pointer,
size_t offset,
experimental::DataType type) {
if (type == experimental::DataType::FLOAT32) {
void* GetPointerByOffset(void* raw_pointer, size_t offset, phi::DataType type) {
if (type == phi::DataType::FLOAT32) {
return reinterpret_cast<void*>(reinterpret_cast<float*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::FLOAT64) {
} else if (type == phi::DataType::FLOAT64) {
return reinterpret_cast<void*>(reinterpret_cast<double*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::FLOAT16) {
} else if (type == phi::DataType::FLOAT16) {
return reinterpret_cast<void*>(reinterpret_cast<int16_t*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::INT32) {
} else if (type == phi::DataType::INT32) {
return reinterpret_cast<void*>(reinterpret_cast<int32_t*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::INT64) {
} else if (type == phi::DataType::INT64) {
return reinterpret_cast<void*>(reinterpret_cast<int64_t*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::INT8) {
} else if (type == phi::DataType::INT8) {
return reinterpret_cast<void*>(reinterpret_cast<int8_t*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::UINT8) {
} else if (type == phi::DataType::UINT8) {
return reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::BOOL) {
} else if (type == phi::DataType::BOOL) {
return reinterpret_cast<void*>(reinterpret_cast<bool*>(raw_pointer) +
offset);
} else if (type == experimental::DataType::BFLOAT16) {
} else if (type == phi::DataType::BFLOAT16) {
return reinterpret_cast<void*>(reinterpret_cast<uint16_t*>(raw_pointer) +
offset);
} else {
Expand Down
7 changes: 3 additions & 4 deletions paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,11 @@ std::vector<std::vector<size_t>> Eager_AssignGroupBySize(

// Key: the var type
// Value: should use which index in group_size_limits for group size limit
std::map<experimental::DataType, size_t> group_limit_index;
std::map<phi::DataType, size_t> group_limit_index;

// Key: the var type
// Value: <the var index in input tensors, total numel in this group>
std::map<experimental::DataType, std::pair<std::vector<size_t>, size_t>>
next_group;
std::map<phi::DataType, std::pair<std::vector<size_t>, size_t>> next_group;

for (size_t i = 0; i < tensors.size(); ++i) {
const auto &var = tensors[i];
Expand Down Expand Up @@ -114,7 +113,7 @@ std::vector<std::vector<size_t>> Eager_AssignGroupBySize(
}

group_info.first.push_back(tensor_real_index);
group_info.second += experimental::SizeOf(var_dtype) * var_size;
group_info.second += phi::SizeOf(var_dtype) * var_size;
// group_info.second += framework::SizeOfType(var_dtype) * var_size;

if (group_limit_index.find(var_dtype) == group_limit_index.end()) {
Expand Down
21 changes: 10 additions & 11 deletions paddle/fluid/eager/amp_auto_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
namespace egr {

static inline bool NeedCast(const paddle::Tensor& tensor,
const paddle::experimental::DataType& dst_dtype) {
const phi::DataType& dst_dtype) {
auto place = tensor.place();
auto data_type = tensor.dtype();
if (paddle::platform::is_gpu_place(place) ||
Expand All @@ -32,9 +32,9 @@ static inline bool NeedCast(const paddle::Tensor& tensor,
paddle::platform::is_npu_pinned_place(place) ||
paddle::platform::is_custom_place(place)) {
// CudaPinndePlace is added for varbase created by dataloader
if ((data_type == paddle::experimental::DataType::FLOAT32 ||
data_type == paddle::experimental::DataType::FLOAT16 ||
data_type == paddle::experimental::DataType::BFLOAT16) &&
if ((data_type == phi::DataType::FLOAT32 ||
data_type == phi::DataType::FLOAT16 ||
data_type == phi::DataType::BFLOAT16) &&
(data_type != dst_dtype)) {
return true;
}
Expand All @@ -45,7 +45,7 @@ static inline bool NeedCast(const paddle::Tensor& tensor,
inline std::vector<paddle::Tensor> AmpAutoCasts(
const std::string& inputs_name,
const std::vector<paddle::Tensor>& inputs,
const paddle::experimental::DataType& dst_dtype,
const phi::DataType& dst_dtype,
std::string op_name) {
VLOG(6) << "AMP AmpAutoCasts:"
<< " inputs(" << inputs_name << ") dst_dtype("
Expand All @@ -65,15 +65,14 @@ inline std::vector<paddle::Tensor> AmpAutoCasts(
return inputs_casted;
}

inline paddle::Tensor AmpAutoCast(
const std::string& input_name,
const paddle::Tensor& input,
const paddle::experimental::DataType& dst_dtype,
std::string op_name) {
inline paddle::Tensor AmpAutoCast(const std::string& input_name,
const paddle::Tensor& input,
const phi::DataType& dst_dtype,
std::string op_name) {
VLOG(6) << "AMP AmpAutoCasts:"
<< " input(" << input_name << ") dst_dtype("
<< phi::DataTypeToString(dst_dtype) << ").";
if (dst_dtype == paddle::experimental::DataType::FLOAT16) {
if (dst_dtype == phi::DataType::FLOAT16) {
if (op_name == "run_program") {
return input;
}
Expand Down
44 changes: 20 additions & 24 deletions paddle/fluid/eager/amp_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,44 +20,41 @@

namespace egr {

static inline paddle::experimental::DataType GetPromoteType(
static inline phi::DataType GetPromoteType(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector,
const paddle::experimental::DataType& amp_dtype) {
const phi::DataType& amp_dtype) {
auto dst_type = amp_dtype;
if (egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype() ==
"float16") {
if (op_name == "batch_norm" || op_name == "layer_norm" ||
op_name == "sync_batch_norm") {
if (amp_tensors_vector[0][0].dtype() ==
paddle::experimental::DataType::FLOAT32) {
dst_type = paddle::experimental::DataType::FLOAT32;
if (amp_tensors_vector[0][0].dtype() == phi::DataType::FLOAT32) {
dst_type = phi::DataType::FLOAT32;
}
} else if (op_name == "fused_attention") {
for (size_t i = 0; i < amp_tensors_vector.size(); i++) {
if (i != 3 || i != 4 || i != 9 || i != 10) {
if (amp_tensors_vector[i][0].dtype() ==
paddle::experimental::DataType::FLOAT32) {
dst_type = paddle::experimental::DataType::FLOAT32;
if (amp_tensors_vector[i][0].dtype() == phi::DataType::FLOAT32) {
dst_type = phi::DataType::FLOAT32;
break;
}
}
}
} else if (op_name == "fused_feedforward") {
for (size_t i = 0; i < amp_tensors_vector.size(); i++) {
if (i != 7 || i != 8 || i != 9 || i != 10) {
if (amp_tensors_vector[i][0].dtype() ==
paddle::experimental::DataType::FLOAT32) {
dst_type = paddle::experimental::DataType::FLOAT32;
if (amp_tensors_vector[i][0].dtype() == phi::DataType::FLOAT32) {
dst_type = phi::DataType::FLOAT32;
break;
}
}
}
} else {
for (const auto& tensors : amp_tensors_vector) {
for (const auto& tensor : tensors) {
if (tensor.dtype() == paddle::experimental::DataType::FLOAT32) {
if (tensor.dtype() == phi::DataType::FLOAT32) {
dst_type = tensor.dtype();
break;
}
Expand All @@ -67,7 +64,7 @@ static inline paddle::experimental::DataType GetPromoteType(
} else {
for (const auto& tensors : amp_tensors_vector) {
for (const auto& tensor : tensors) {
if (tensor.dtype() == paddle::experimental::DataType::FLOAT32) {
if (tensor.dtype() == phi::DataType::FLOAT32) {
dst_type = tensor.dtype();
break;
}
Expand All @@ -77,20 +74,19 @@ static inline paddle::experimental::DataType GetPromoteType(
// NOTE(juncai): moving_average_abs_max_scale only consider the dtype of
// input(X)
if (op_name == "moving_average_abs_max_scale") {
if (amp_tensors_vector[0][0].dtype() ==
paddle::experimental::DataType::FLOAT16) {
dst_type = paddle::experimental::DataType::FLOAT16;
if (amp_tensors_vector[0][0].dtype() == phi::DataType::FLOAT16) {
dst_type = phi::DataType::FLOAT16;
}
}
return dst_type;
}

inline paddle::experimental::DataType GetDtypeWithPlace(
inline phi::DataType GetDtypeWithPlace(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector,
const paddle::experimental::DataType amp_dtype) {
if (amp_dtype == paddle::experimental::DataType::FLOAT32) {
const phi::DataType amp_dtype) {
if (amp_dtype == phi::DataType::FLOAT32) {
return amp_dtype;
}
bool is_right_place = false;
Expand All @@ -113,12 +109,12 @@ inline paddle::experimental::DataType GetDtypeWithPlace(
if (!is_right_place) {
VLOG(6) << "Change " << op_name << "'s AMP type from " << amp_dtype
<< " to FP32";
return paddle::experimental::DataType::FLOAT32;
return phi::DataType::FLOAT32;
}
return amp_dtype;
}

inline paddle::experimental::DataType GetAmpDestDtype(
inline phi::DataType GetAmpDestDtype(
const std::string& op_name,
const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector) {
Expand All @@ -134,23 +130,23 @@ inline paddle::experimental::DataType GetAmpDestDtype(
} else if (paddle::imperative::AmpOperators::Instance()
.GetMutableBlockOps()
->count(op_name)) {
dst_type = paddle::experimental::DataType::FLOAT32;
dst_type = phi::DataType::FLOAT32;
} else {
dst_type = GetPromoteType(op_name, amp_tensors_vector, amp_setting_dtype);
}
} else if (amp_level == paddle::imperative::AmpLevel::O2) {
if (paddle::imperative::AmpOperators::Instance()
.GetMutableBlockOps()
->count(op_name)) {
dst_type = paddle::experimental::DataType::FLOAT32;
dst_type = phi::DataType::FLOAT32;
}
}

if (dst_type == amp_setting_dtype &&
(paddle::imperative::AmpOperators::Instance()
.GetMutableUnsupportedOps(amp_setting_dtype)
->count(op_name))) {
dst_type = paddle::experimental::DataType::FLOAT32;
dst_type = phi::DataType::FLOAT32;
}

dst_type = GetDtypeWithPlace(op_name, amp_tensors_vector, dst_type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
'bool[]': 'std::vector<bool>',
'Place': 'paddle::Place',
'DataLayout': 'phi::DataLayout',
'DataType': 'paddle::experimental::DataType',
'DataType': 'phi::DataType',
'int64_t[]': 'std::vector<int64_t>',
'int[]': 'std::vector<int>',
'Tensor': 'Tensor',
Expand Down Expand Up @@ -293,8 +293,10 @@ def ParseYamlArgs(string):
assert (
arg_type in yaml_types_mapping.keys()
), f"The argument type {arg_type} in yaml config is not supported in yaml_types_mapping."
if arg_type in ["DataType", "DataLayout"] and default_value is not None:
if arg_type in ["DataLayout"] and default_value is not None:
default_value = f"paddle::experimental::{default_value}"
if arg_type in ["DataType"] and default_value is not None:
default_value = f"phi::{default_value}"
arg_type = yaml_types_mapping[arg_type]

arg_name = RemoveSpecialSymbolsInName(arg_name)
Expand Down
Loading