Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… clear_add_index_put_api
  • Loading branch information
Courtesy-Xs committed May 9, 2023
2 parents ed00d81 + 9cd0a5b commit f956aee
Show file tree
Hide file tree
Showing 547 changed files with 11,084 additions and 4,595 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ tools/nvcc_lazy
# TODO(zhiqiang) Move this file to build directory.
paddle/fluid/pybind/eager_op_function.cc
tools/nvcc_lazy

paddle/phi/kernels/sparse/gpu/cutlass_generator/all_gemm_operations.h
paddle/phi/kernels/sparse/gpu/cutlass_generator/configurations.h

# these files (directories) are generated before build system generation
paddle/fluid/operators/generated_op*.cc
Expand Down
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
message(STATUS "AR tools: ${CMAKE_AR}")

if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND CMAKE_CXX_COMPILER_VERSION
VERSION_GREATER 10.4)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=uninitialized")
endif()

# MUSL build turn off warnings
if(WITH_MUSL)
set(CMAKE_CXX_FLAGS
Expand Down
31 changes: 24 additions & 7 deletions cmake/external/cutlass.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,37 @@ ExternalProject_Add(
INSTALL_COMMAND ""
TEST_COMMAND "")

set(tmp_gemm_operations_file
${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator/generated/gemm/all_gemm_operations.h.tmp
)
set(tmp_configurations_file
${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator/generated/gemm/configurations.h.tmp
)
set(gemm_operations_file
${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator/all_gemm_operations.h
)
set(configurations_file
${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator/configurations.h
)

add_custom_target(
cutlass_codegen
COMMAND
rm -rf
${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator/build
COMMAND
mkdir -p
${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator/build/generated/gemm
COMMAND
${PYTHON_EXECUTABLE} -B
${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_generator.py
"${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/tools/library/scripts/"
"${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator/build"
"${CMAKE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/cutlass_generator"
"${CMAKE_CUDA_COMPILER_VERSION}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_gemm_operations_file}
${gemm_operations_file}
COMMAND
${CMAKE_COMMAND} -E echo
"copy_if_different ${tmp_gemm_operations_file} to ${gemm_operations_file}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_configurations_file}
${configurations_file}
COMMAND
${CMAKE_COMMAND} -E echo
"copy_if_different ${tmp_configurations_file} to ${configurations_file}"
VERBATIM)

add_library(cutlass INTERFACE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupCustom::Collective(
std::vector<phi::DenseTensor>& outputs,
Fn fn,
CommType op_type,
bool sync_op,
bool sync_op UNUSED,
bool use_calc_stream) {
const auto places = GetPlaceList(inputs);
const auto key = GetKeyFromPlaces(places);
Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/distributed/ps/service/brpc_ps_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,10 @@ class DownpourPServerBrpcClosure : public PServerClosure {
PsRequestMessage *request(size_t i) { return &_requests[i]; }
PsResponseMessage *response(size_t i) { return &_responses[i]; }
brpc::Controller *cntl(size_t i) { return _cntls[i].get(); }
int check_response(size_t request_idx, int cmd_id) { return 1; }
int check_save_response(size_t request_idx, int cmd_id) { return 1; }
int check_response(size_t request_idx UNUSED, int cmd_id UNUSED) { return 1; }
int check_save_response(size_t request_idx UNUSED, int cmd_id UNUSED) {
return 1;
}

private:
std::atomic<int32_t> _waiting_num;
Expand Down
20 changes: 10 additions & 10 deletions paddle/fluid/distributed/ps/service/communicator/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,10 @@ class Communicator {
return {};
}
virtual void SaveFLStrategy(
const std::unordered_map<uint32_t, std::string> &fl_strategy) {}
const std::unordered_map<uint32_t, std::string> &fl_strategy UNUSED) {}
virtual void StartCoordinator(
const std::string &self_endpoint,
const std::vector<std::string> &trainer_endpoints) {}
const std::string &self_endpoint UNUSED,
const std::vector<std::string> &trainer_endpoints UNUSED) {}

virtual ~Communicator() {}
virtual void RpcProfilerControl();
Expand Down Expand Up @@ -337,13 +337,13 @@ class Communicator {

virtual void BarrierTriggerDecrement() {}

virtual void BarrierTriggerReset(int init_counter) {}
virtual void BarrierTriggerReset(int init_counter UNUSED) {}

virtual void InitEnvs() = 0;

virtual void InitImpl(const RpcCtxMap &send_varname_to_ctx,
const RecvCtxMap &recv_varname_to_ctx,
Scope *recv_scope) {}
virtual void InitImpl(const RpcCtxMap &send_varname_to_ctx UNUSED,
const RecvCtxMap &recv_varname_to_ctx UNUSED,
Scope *recv_scope UNUSED) {}

static Communicator *GetInstance() { return communicator_.get(); }

Expand Down Expand Up @@ -682,9 +682,9 @@ class FLCommunicator : public GeoCommunicator {
virtual void InitBrpcClient(const std::string &dist_desc,
const std::vector<std::string> &host_sign_list);

void InitImpl(const RpcCtxMap &send_varname_to_ctx,
const RecvCtxMap &recv_varname_to_ctx,
Scope *recv_scope) {}
void InitImpl(const RpcCtxMap &send_varname_to_ctx UNUSED,
const RecvCtxMap &recv_varname_to_ctx UNUSED,
Scope *recv_scope UNUSED) {}

void StartCoordinatorClient(
const std::vector<std::string> &trainer_endpoints);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/distributed/ps/service/coordinator_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ class CoordinatorService : public PsService {
::google::protobuf::Closure* done);

int32_t SaveFLClientInfo(const CoordinatorReqMessage& request,
CoordinatorResMessage* response,
brpc::Controller* cntl) {
CoordinatorResMessage* response UNUSED,
brpc::Controller* cntl UNUSED) {
_coordinator_service_handle->SaveFLClientInfo(request);
return 0;
}
Expand Down
11 changes: 6 additions & 5 deletions paddle/fluid/distributed/ps/table/common_graph_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ class GraphTable : public Table {
}
virtual ~GraphTable();

virtual void *GetShard(size_t shard_idx) { return 0; }
virtual void *GetShard(size_t shard_idx UNUSED) { return 0; }

static int32_t sparse_local_shard_num(uint32_t shard_num,
uint32_t server_num) {
Expand Down Expand Up @@ -624,15 +624,16 @@ class GraphTable : public Table {
Node *find_node(GraphTableType table_type, int idx, uint64_t id);
Node *find_node(GraphTableType table_type, uint64_t id);

virtual int32_t Pull(TableContext &context) { return 0; } // NOLINT
virtual int32_t Push(TableContext &context) { return 0; } // NOLINT
virtual int32_t Pull(TableContext &context UNUSED) { return 0; } // NOLINT
virtual int32_t Push(TableContext &context UNUSED) { return 0; } // NOLINT

virtual int32_t clear_nodes(GraphTableType table_type, int idx);
virtual void Clear() {}
virtual int32_t Flush() { return 0; }
virtual int32_t Shrink(const std::string &param) { return 0; }
virtual int32_t Shrink(const std::string &param UNUSED) { return 0; }
// 指定保存路径
virtual int32_t Save(const std::string &path, const std::string &converter) {
virtual int32_t Save(const std::string &path UNUSED,
const std::string &converter UNUSED) {
return 0;
}
virtual int32_t InitializeShard() { return 0; }
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/amp_auto_cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ inline std::vector<paddle::Tensor> AmpAutoCasts(
const std::string& inputs_name,
const std::vector<paddle::Tensor>& inputs,
const phi::DataType& dst_dtype,
std::string op_name) {
std::string op_name UNUSED) {
VLOG(6) << "AMP AmpAutoCasts:"
<< " inputs(" << inputs_name << ") dst_dtype("
<< phi::DataTypeToString(dst_dtype) << ").";
Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/eager/nan_inf_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ void CheckTensorHasNanOrInf(const std::string& api_name, const Tensor& tensor) {

void CheckTensorHasNanOrInf(const std::string& api_name,
const paddle::optional<Tensor>& tensor) {
CheckTensorHasNanOrInf(api_name, tensor.get());
if (tensor) {
CheckTensorHasNanOrInf(api_name, *tensor);
}
}

void CheckTensorHasNanOrInf(const std::string& api_name,
Expand Down Expand Up @@ -169,7 +171,7 @@ void CheckTensorHasNanOrInf(
const std::string& api_name,
const paddle::optional<std::vector<Tensor>>& tensors) {
if (tensors) {
CheckTensorHasNanOrInf(api_name, tensors.get());
CheckTensorHasNanOrInf(api_name, *tensors);
}
}

Expand Down
4 changes: 1 addition & 3 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -928,9 +928,7 @@ if(WITH_DISTRIBUTE)
fleet_executor)
endif()
elseif(WITH_PSLIB)
set(DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
set(DISTRIBUTE_COMPILE_FLAGS "")
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
endif()
Expand Down
5 changes: 0 additions & 5 deletions paddle/fluid/framework/dlpack_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,6 @@ struct DLDeviceVisitor
platform::errors::Unimplemented("platform::XPUPlace is not supported"));
}

inline ::DLDevice operator()(const platform::NPUPlace &place) const {
PADDLE_THROW(
platform::errors::Unimplemented("platform::NPUPlace is not supported"));
}

inline ::DLDevice operator()(const platform::NPUPinnedPlace &place) const {
PADDLE_THROW(platform::errors::Unimplemented(
"platform::NPUPinnedPlace is not supported"));
Expand Down
14 changes: 4 additions & 10 deletions paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ pass_library(delete_dropout_op_pass inference)
pass_library(delete_concat_op_pass inference)
pass_library(delete_c_identity_op_pass inference)
pass_library(preln_residual_bias_fuse_pass inference)
pass_library(delete_fill_constant_op_pass inference)
pass_library(constant_folding_pass inference)
pass_library(auto_mixed_precision_pass inference)
pass_library(conv2d_fusion_layout_transfer_pass inference)
Expand All @@ -118,7 +117,6 @@ pass_library(fused_multi_transformer_encoder_pass inference)
pass_library(fused_multi_transformer_decoder_pass inference)
pass_library(fuse_multi_transformer_layer_pass inference)
pass_library(adaptive_pool2d_convert_global_pass inference)
pass_library(unsqueeze2_eltwise_fuse_pass inference)
pass_library(yolo_box_fuse_pass inference)
pass_library(layer_norm_fuse_pass inference)
pass_library(add_support_int8_pass inference)
Expand Down Expand Up @@ -242,7 +240,7 @@ if(WITH_XPU)
pass_library(one_beam_size_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS})
pass_library(delete_isolated_node_pass inference DIR xpu DEPS
${XPU_PASS_DEPS})
pass_library(fused_multi_transformer_xpu_quant_pass inference DIR xpu DEPS
pass_library(fused_multi_transformer_xpu_pass inference DIR xpu DEPS
${XPU_PASS_DEPS})
pass_library(stack_fuse_pass inference DIR xpu DEPS ${XPU_PASS_DEPS})
pass_library(fused_multi_transformer_cachekv_layout_trans_pass inference DIR
Expand Down Expand Up @@ -391,10 +389,6 @@ cc_test(
test_adaptive_pool2d_convert_global_pass
SRCS adaptive_pool2d_convert_global_pass_tester.cc
DEPS adaptive_pool2d_convert_global_pass)
cc_test(
test_unsqueeze2_eltwise_fuse_pass_cc
SRCS unsqueeze2_eltwise_fuse_pass_tester.cc
DEPS unsqueeze2_eltwise_fuse_pass)
cc_test(
test_generate_pass_cc
SRCS generate_pass_tester.cc
Expand Down Expand Up @@ -519,9 +513,9 @@ if(WITH_XPU)
SRCS xpu/delete_isolated_node_pass_test.cc
DEPS delete_isolated_node_pass)
cc_test(
test_fused_multi_transformer_xpu_quant_pass
SRCS xpu/fused_multi_transformer_xpu_quant_pass_tester.cc
DEPS fused_multi_transformer_xpu_quant_pass)
test_fused_multi_transformer_xpu_pass
SRCS xpu/fused_multi_transformer_xpu_pass_tester.cc
DEPS fused_multi_transformer_xpu_pass)
cc_test(
test_one_beam_size_fuse_pass
SRCS xpu/one_beam_size_fuse_pass_test.cc
Expand Down
2 changes: 0 additions & 2 deletions paddle/fluid/framework/ir/auto_mixed_precision_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ static phi::Backend ConvertPlaceToBackend(const phi::Place& place) {
return phi::Backend::GPU;
case phi::AllocationType::XPU:
return phi::Backend::XPU;
case phi::AllocationType::NPU:
return phi::Backend::NPU;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Cannot convert place(%d).", static_cast<int>(place.GetType())));
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/constant_folding_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const {
platform::errors::Fatal(
"scope must not be null when applying constant floding."));

std::vector<std::string> blacklist{"feed", "matrix_multiply"};
std::vector<std::string> blacklist{"feed", "matrix_multiply", "save"};

auto op_node_sorted = framework::ir::TopologyVarientSort(
*graph, static_cast<framework::ir::SortKind>(0));
Expand Down
Loading

0 comments on commit f956aee

Please sign in to comment.