diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake index 0cc40ddfefcdd..8ae72e0aa78fa 100644 --- a/cmake/external/lite.cmake +++ b/cmake/external/lite.cmake @@ -50,7 +50,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) set(LITE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lite) if(NOT LITE_GIT_TAG) - set(LITE_GIT_TAG 4ab64daecc11fbf74fffdc6a4733f388472e7d5d) + set(LITE_GIT_TAG 62fc737d4a553bca738f96b0402b28f26a8d2d4f) endif() if(NOT CUDA_ARCH_NAME) @@ -197,9 +197,9 @@ set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_ if (LITE_WITH_NNADAPTER) set(LITE_NNADAPTER_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so) if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU) - external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so) + external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so) set(LITE_DEPS lite_full_static lite_nnadapter) - set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so) + set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so) endif() else() set(LITE_DEPS lite_full_static) diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index 4510aea925e78..bb78cdab67752 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -51,7 +51,12 @@ FCFusePass::FCFusePass() { .IsTensor() .End() .AddAttr("axis") - .IsNumGE(1) + .IsNumMatch([](int axis) -> bool { + if (axis == -1 || axis >= 1) { + return true; + } + return false; + }) .End(); AddOpCompat(OpCompat("relu")) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index cda6dc31126d9..ad96a4e3437be 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -238,6 +238,7 @@ struct Argument { DECL_ARGUMENT_FIELD(xpu_autotune_file, XpuAutotuneFile, std::string); DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string); DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool); + DECL_ARGUMENT_FIELD(xpu_device_id, XpuDeviceId, int); DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool); DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir, NNAdapterModelCacheDir, diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 4fdd963b6abff..56bf91fb624af 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -202,6 +202,7 @@ void IRPassManager::CreatePasses(Argument *argument, new std::string(argument->xpu_autotune_file())); pass->Set("precision", new std::string(argument->xpu_precision())); pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen())); + pass->Set("xpu_device_id", new int(argument->xpu_device_id())); // NNAdapter Related pass->Set("use_nnadapter", new bool(argument->use_nnadapter())); pass->Set("nnadapter_model_cache_dir", diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc index c04342f837e3f..6c38809b43215 100644 --- a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc @@ -243,6 +243,7 @@ void LiteSubgraphPass::SetUpEngine( bool use_gpu = Get("use_gpu"); bool enable_int8 = Get("enable_int8"); bool use_xpu = Get("use_xpu"); + int xpu_device_id = Get("xpu_device_id"); int xpu_l3_workspace_size = Get("xpu_l3_workspace_size"); int cpu_math_library_num_threads = Get("cpu_math_library_num_threads"); bool locked = Get("locked"); @@ -305,6 +306,7 @@ void LiteSubgraphPass::SetUpEngine( }; config.cpu_math_library_num_threads = cpu_math_library_num_threads; config.xpu_l3_workspace_size = xpu_l3_workspace_size; + config.device_id = xpu_device_id; config.locked = locked; config.autotune = autotune; config.autotune_file = autotune_file; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index a6785a7768167..c831b34648276 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -619,6 +619,7 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_); argument_.SetXpuPrecision(config_.xpu_precision_); argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_); + argument_.SetXpuDeviceId(config_.xpu_device_id_); // NNAdapter related argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter); argument_.SetNNAdapterDeviceNames( diff --git a/paddle/fluid/inference/lite/engine.cc b/paddle/fluid/inference/lite/engine.cc index 47b9d681b4754..cd78cfecd8635 100644 --- a/paddle/fluid/inference/lite/engine.cc +++ b/paddle/fluid/inference/lite/engine.cc @@ -67,6 +67,7 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create( lite_cxx_config.set_xpu_conv_autotune(cfg.autotune, cfg.autotune_file); lite_cxx_config.set_xpu_multi_encoder_method(cfg.precision, cfg.adaptive_seqlen); + lite_cxx_config.set_xpu_dev_per_thread(cfg.device_id); #endif #ifdef LITE_SUBGRAPH_WITH_NPU diff --git a/paddle/fluid/inference/lite/engine.h b/paddle/fluid/inference/lite/engine.h index 48072656cb996..adeaca7c1c3b7 100644 --- a/paddle/fluid/inference/lite/engine.h +++ b/paddle/fluid/inference/lite/engine.h @@ -39,6 +39,9 @@ struct EngineConfig { std::vector neglected_passes; lite_api::LiteModelType model_type{lite_api::LiteModelType::kProtobuf}; bool model_from_memory{true}; + // TODO(wilber): now only works for xpu, lite gpu can support device_id or + // not? + int device_id = 0; // for xpu size_t xpu_l3_workspace_size; diff --git a/paddle/fluid/inference/utils/io_utils.cc b/paddle/fluid/inference/utils/io_utils.cc index 3691285ba3a51..87331e1978f95 100644 --- a/paddle/fluid/inference/utils/io_utils.cc +++ b/paddle/fluid/inference/utils/io_utils.cc @@ -197,6 +197,9 @@ void SerializeShapeRangeInfo( void DeserializeShapeRangeInfo( const std::string &path, paddle::inference::proto::ShapeRangeInfos *info) { int fd = open(path.c_str(), O_RDONLY); + if (fd == -1) { + PADDLE_THROW(platform::errors::NotFound("File [%s] is not found.", path)); + } google::protobuf::io::FileInputStream *is = new google::protobuf::io::FileInputStream(fd); google::protobuf::TextFormat::Parse(is, info); diff --git a/paddle/fluid/inference/utils/io_utils_tester.cc b/paddle/fluid/inference/utils/io_utils_tester.cc index 766afed4e5014..f85f903b55c21 100644 --- a/paddle/fluid/inference/utils/io_utils_tester.cc +++ b/paddle/fluid/inference/utils/io_utils_tester.cc @@ -118,4 +118,7 @@ TEST(shape_info_io, read_and_write) { std::vector names{"test1"}; paddle::inference::UpdateShapeRangeInfo(path, min_shape, max_shape, opt_shape, names); + ASSERT_THROW(paddle::inference::DeserializeShapeRangeInfo( + "no_exists_file", &min_shape, &max_shape, &opt_shape); + , paddle::platform::EnforceNotMet); } diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 7a0a74d6101a5..0c2580929081d 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -223,7 +223,7 @@ function cmake_base() { -DWITH_GLOO=${gloo_flag} -DWITH_LITE=${WITH_LITE:-OFF} -DWITH_XPU=${WITH_XPU:-OFF} - -DLITE_GIT_TAG=_release/v2.10 + -DLITE_GIT_TAG=release/v2.10 -DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} -DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} -DWITH_ARM=${WITH_ARM:-OFF} @@ -266,7 +266,7 @@ EOF -DWITH_PSCORE=${distibuted_flag} \ -DWITH_PSLIB=${WITH_PSLIB:-OFF} \ -DWITH_GLOO=${gloo_flag} \ - -DLITE_GIT_TAG=_release/v2.10 \ + -DLITE_GIT_TAG=release/v2.10 \ -DWITH_XPU=${WITH_XPU:-OFF} \ -DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} \ -DWITH_LITE=${WITH_LITE:-OFF} \ diff --git a/tools/check_api_approvals.sh b/tools/check_api_approvals.sh index 5943e997bdfa2..760bc2b168475 100644 --- a/tools/check_api_approvals.sh +++ b/tools/check_api_approvals.sh @@ -41,13 +41,13 @@ function add_failed(){ api_params_diff=`python ${PADDLE_ROOT}/tools/check_api_compatible.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec ${PADDLE_ROOT}/paddle/fluid/API_PR.spec` api_spec_diff=`python ${PADDLE_ROOT}/tools/diff_api.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.spec.api ${PADDLE_ROOT}/paddle/fluid/API_PR.spec.api` if [ "$api_spec_diff" != "" -o "${api_params_diff}" != "" ]; then - echo_line="You must have one RD (XiaoguangHu01 or lanxianghit) approval for API change.\n" + echo_line="You must have one RD (XiaoguangHu01, lanxianghit or Superjomn) approval for API change.\n" echo_line="${echo_line} and one TPM approval for API change: \n" echo_line="${echo_line} jzhang533/ZhangJun, dingjiaweiww/DingJiaWei, Heeenrrry/LiKunLun, TCChenlong/ChenLong for general APIs\n" echo_line="${echo_line} PangHua/XiangHui for distributed related APIs\n" echo_line="${echo_line} twismon/WangYunKai, CheQiXiao/CheQiXiao for inference related APIs.\n" - check_approval 1 46782768 47554610 + check_approval 1 46782768 47554610 328693 check_approval 1 29231 23093488 28379894 11935832 2682285 12050047 50894398 fi