diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 42de34fb52061..640e2e37ad434 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -35,7 +35,7 @@ ELSE () ENDIF() SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") -SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701") +SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210729") SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 485fddff4df42..c632a1dc6935a 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -188,8 +188,13 @@ cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place) cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference) +IF(WITH_XPU) +cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto + shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) +ELSE() cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) +ENDIF() cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 20cffaa959019..0f7012940d76b 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -36,7 +36,8 @@ class LoDTensor; } // namespace framework } // namespace paddle #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_op_list.h" #endif #ifdef PADDLE_WITH_MKLDNN @@ -1254,7 +1255,8 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, #endif #ifdef PADDLE_WITH_XPU if (kernel_iter == kernels.end() && - is_xpu_place(expected_kernel_key.place_)) { + is_xpu_place(expected_kernel_key.place_) && + !paddle::platform::is_xpu_support_op(type_, expected_kernel_key)) { VLOG(3) << "missing XPU kernel: " << type_ << ", expected_kernel_key:" << expected_kernel_key << ", fallbacking to CPU one!"; diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index c9dffe2d76a43..93b18e7e553b5 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,6 +1,10 @@ cc_library(imperative_flag SRCS flags.cc DEPS gflags) +IF(WITH_XPU) +cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) +ELSE() cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) +ENDIF() cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry) add_subdirectory(jit) cc_library(amp SRCS amp_auto_cast.cc DEPS layer ) diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 57c6ae3cbb0a1..e463ddae383ff 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -17,7 +17,9 @@ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/imperative/infer_shape_context.h" - +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu/xpu_op_list.h" +#endif DECLARE_bool(check_nan_inf); namespace paddle { @@ -127,7 +129,8 @@ PreparedOp PrepareImpl(const NameVarMap& ins, auto kernel_iter = kernels.find(expected_kernel_key); #ifdef PADDLE_WITH_XPU if (kernel_iter == kernels.end() && - is_xpu_place(expected_kernel_key.place_)) { + is_xpu_place(expected_kernel_key.place_) && + !paddle::platform::is_xpu_support_op(op.Type(), expected_kernel_key)) { VLOG(3) << "missing XPU kernel: " << op.Type() << ", expected_kernel_key:" << expected_kernel_key << ", fallbacking to CPU one!"; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 8bc9775381be5..bfc4a1d598200 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -33,7 +33,7 @@ #include "paddle/fluid/platform/gpu_info.h" #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #include "paddle/fluid/platform/npu_info.h" diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index bc72b4b20d061..6c2fb82cb7cbe 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -31,7 +31,7 @@ #include "paddle/fluid/platform/cuda_device_guard.h" #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #endif DEFINE_bool(init_allocated_mem, false, diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index f2f8c5d1fb555..3b3be9776c4c5 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc index 2c7219ef6885b..257a91d7c15d7 100644 --- a/paddle/fluid/operators/activation_op_xpu.cc +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/concat_op_xpu.cc b/paddle/fluid/operators/concat_op_xpu.cc index be299babdba7a..dc9359ecf5c3d 100644 --- a/paddle/fluid/operators/concat_op_xpu.cc +++ b/paddle/fluid/operators/concat_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/deformable_conv_op_xpu.cc b/paddle/fluid/operators/deformable_conv_op_xpu.cc index 18bab83b0edb8..457616756215c 100644 --- a/paddle/fluid/operators/deformable_conv_op_xpu.cc +++ b/paddle/fluid/operators/deformable_conv_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dropout_op_xpu.cc b/paddle/fluid/operators/dropout_op_xpu.cc index 79d239074845a..0b0b7095bd5d1 100644 --- a/paddle/fluid/operators/dropout_op_xpu.cc +++ b/paddle/fluid/operators/dropout_op_xpu.cc @@ -11,7 +11,7 @@ limitations under the License. */ #include "paddle/fluid/operators/dropout_op.h" #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc index d73e46df3491b..cb75616221bc4 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/metrics/accuracy_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc index 9cc8ac200b8ee..4f98dde210f7a 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/operators/reduce_ops/logsumexp_op.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc index a4ed0c85f4f9d..ae27a5d7df473 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc @@ -16,7 +16,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h index fa9503ec3f0ae..5ae60713bc912 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h @@ -21,7 +21,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc index bf55221bd3ffd..f759b104d01d1 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc @@ -16,7 +16,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/rnn_op_xpu.cc b/paddle/fluid/operators/rnn_op_xpu.cc index fb82d18e62f3b..9d637e1cee117 100644 --- a/paddle/fluid/operators/rnn_op_xpu.cc +++ b/paddle/fluid/operators/rnn_op_xpu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc index fdb90797b69db..e0dfad91570ad 100644 --- a/paddle/fluid/operators/scale_op_xpu.cc +++ b/paddle/fluid/operators/scale_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/scale_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sign_op_xpu.cc b/paddle/fluid/operators/sign_op_xpu.cc index 86fe826c659ef..a164a9b056677 100644 --- a/paddle/fluid/operators/sign_op_xpu.cc +++ b/paddle/fluid/operators/sign_op_xpu.cc @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/sign_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc index 264cc4e2cf794..d16bb5562ed3a 100644 --- a/paddle/fluid/operators/sum_op_xpu.cc +++ b/paddle/fluid/operators/sum_op_xpu.cc @@ -13,7 +13,7 @@ limitations under the License. */ #include "paddle/fluid/operators/sum_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/transpose_op_xpu.cc b/paddle/fluid/operators/transpose_op_xpu.cc index 2748c07f9e6d7..360c2125ed1f6 100644 --- a/paddle/fluid/operators/transpose_op_xpu.cc +++ b/paddle/fluid/operators/transpose_op_xpu.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 36a956762174e..efd25bc892940 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -69,7 +69,8 @@ cc_library(place SRCS place.cc DEPS enforce boost) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) if(WITH_XPU) -cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib) +cc_library(xpu_info SRCS xpu/xpu_info.cc DEPS gflags glog enforce xpulib) +cc_library(xpu_op_list SRCS xpu/xpu_op_list.cc DEPS gflags glog enforce xpulib) endif() if(WITH_ASCEND) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 82f14c612d1fa..c7162f58de284 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -196,7 +196,10 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { Place CPUDeviceContext::GetPlace() const { return place_; } #ifdef PADDLE_WITH_XPU -XPUDeviceContext::XPUDeviceContext() { context_ = xpu::create_context(); } +XPUDeviceContext::XPUDeviceContext() { + context_ = xpu::create_context(); + xpu_version_ = get_xpu_version(place_.device); +} XPUDeviceContext::~XPUDeviceContext() {} diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 68589f546dc78..abac12ff26648 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -68,8 +68,8 @@ struct GpuDevice; } // namespace Eigen #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef PADDLE_WITH_ASCEND_CL @@ -137,12 +137,14 @@ struct DefaultDeviceContextType { }; #ifdef PADDLE_WITH_XPU +namespace xpu = baidu::xpu::api; class XPUDeviceContext : public DeviceContext { public: XPUDeviceContext(); explicit XPUDeviceContext(XPUPlace place); virtual ~XPUDeviceContext(); Eigen::DefaultDevice* eigen_device() const { return nullptr; } + XPUVersion xpu_version() const { return xpu_version_; } Place GetPlace() const override; xpu::Context* x_context() const; @@ -159,6 +161,7 @@ class XPUDeviceContext : public DeviceContext { private: XPUPlace place_; + XPUVersion xpu_version_; xpu::Context* context_; #ifdef PADDLE_WITH_XPU_BKCL BKCLContext_t bkcl_context_; diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index ac6988d350f4f..2e0ba9d241c72 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -29,8 +29,8 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef WITH_WIN_DUMP_DBG diff --git a/paddle/fluid/platform/xpu/xpu1_op_list.h b/paddle/fluid/platform/xpu/xpu1_op_list.h new file mode 100644 index 0000000000000..131525718cac7 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu1_op_list.h @@ -0,0 +1,230 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include +#include +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using vartype = paddle::framework::proto::VarType; +using pOpKernelType = paddle::framework::OpKernelType; +using XPUKernelSet = + std::unordered_set; +using XPUOpMap = std::unordered_map; + +XPUOpMap& get_kl1_ops() { + // KL1支持的op,通过op_name, data_type, place来索引 + static XPUOpMap s_xpu1_kernels{ + {"relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"relu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tanh", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tanh_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gelu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gelu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_switch", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_switch_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pow", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"abs", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"affine_channel", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"affine_channel_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"assign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"batch_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"batch_norm_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"cast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"clip_by_norm", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"coalesce_tensor", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"c_reduce_sum", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"c_allreduce_sum", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"broadcast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"concat", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicalor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicaland", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicalnot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"dropout", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"dropout_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_sub", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_sub_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_add", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_add_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_div", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_div_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_pow", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_floordiv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_mul", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_mul_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_max", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_max_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_min", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_min_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"fill_constant", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gather", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gather_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gaussian_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"layer_norm_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"load", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log_loss", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log_loss_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lookup_table_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lookup_table_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mean_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"accuracy", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sgd", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"adam", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rmsprop", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lamb", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pool2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pool2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"range", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_sum_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logsumexp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_max_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reshape2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reshape2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"roi_align", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"roi_align_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"scale", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"shape", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"slice", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"slice_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax_with_cross_entropy", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"stack", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"top_k", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"truncated_gaussian_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"uniform_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"momuntem", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})} + // AddMore + }; + + return s_xpu1_kernels; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu/xpu2_op_list.h b/paddle/fluid/platform/xpu/xpu2_op_list.h new file mode 100644 index 0000000000000..fc80e5ee962f9 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu2_op_list.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include +#include +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using vartype = paddle::framework::proto::VarType; +using pOpKernelType = paddle::framework::OpKernelType; +using XPUKernelSet = + std::unordered_set; +using XPUOpMap = std::unordered_map; + +XPUOpMap& get_kl2_ops() { + // KL1支持的op,通过op_name, data_type, place来索引 + static XPUOpMap s_xpu2_kernels{ + {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace())})}, + // AddMore + }; + + return s_xpu2_kernels; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu_header.h b/paddle/fluid/platform/xpu/xpu_header.h similarity index 95% rename from paddle/fluid/platform/xpu_header.h rename to paddle/fluid/platform/xpu/xpu_header.h index 99f4224b5d408..caee41ae299c7 100644 --- a/paddle/fluid/platform/xpu_header.h +++ b/paddle/fluid/platform/xpu/xpu_header.h @@ -21,12 +21,9 @@ #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/float16.h" -#include "xpu/api.h" -#include "xpu/refactor/fusion.h" -#include "xpu/refactor/math.h" -#include "xpu/refactor/nn.h" #include "xpu/runtime.h" #include "xpu/runtime_ex.h" +#include "xpu/xdnn.h" namespace xpu = baidu::xpu::api; diff --git a/paddle/fluid/platform/xpu_info.cc b/paddle/fluid/platform/xpu/xpu_info.cc similarity index 86% rename from paddle/fluid/platform/xpu_info.cc rename to paddle/fluid/platform/xpu/xpu_info.cc index f88248fda7e65..6b8ab16b47d68 100644 --- a/paddle/fluid/platform/xpu_info.cc +++ b/paddle/fluid/platform/xpu/xpu_info.cc @@ -8,14 +8,14 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #include #include #include #include "gflags/gflags.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #include "paddle/fluid/string/split.h" DEFINE_string(selected_xpus, "", @@ -103,5 +103,21 @@ void SetXPUDeviceId(int id) { ret)); } +XPUVersion get_xpu_version(int dev_id) { + uint64_t v = 0; + int ret = xpu_device_get_attr(&v, XPUATTR_MODEL, dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "xpu_device_get_attr return wrong value[%d]", ret)); + + if (v == K100 || v == K200) { + VLOG(1) << "KUNLUN device " << dev_id << " is XPU1\n"; + return XPU1; + } else { + VLOG(1) << "KUNLUN device " << dev_id << " is XPU2\n"; + return XPU2; + } +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/xpu_info.h b/paddle/fluid/platform/xpu/xpu_info.h similarity index 95% rename from paddle/fluid/platform/xpu_info.h rename to paddle/fluid/platform/xpu/xpu_info.h index 2bf7b0b5cb647..3cb79d51eb7bb 100644 --- a/paddle/fluid/platform/xpu_info.h +++ b/paddle/fluid/platform/xpu/xpu_info.h @@ -51,6 +51,9 @@ class XPUDeviceGuard { int prev_id_{-1}; }; +enum XPUVersion { XPU1, XPU2 }; +XPUVersion get_xpu_version(int dev_id); + } // namespace platform } // namespace paddle #endif diff --git a/paddle/fluid/platform/xpu/xpu_op_list.cc b/paddle/fluid/platform/xpu/xpu_op_list.cc new file mode 100644 index 0000000000000..b3349407942bd --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu_op_list.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef PADDLE_WITH_XPU +#include + +#include "paddle/fluid/platform/xpu/xpu1_op_list.h" +#include "paddle/fluid/platform/xpu/xpu2_op_list.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_op_list.h" + +namespace paddle { +namespace platform { + +bool is_xpu_support_op(std::string op_name, const pOpKernelType& type) { + auto& ops = get_kl1_ops(); + auto v = + get_xpu_version(BOOST_GET_CONST(platform::XPUPlace, type.place_).device); + if (v == XPU2) { + ops = get_kl2_ops(); + } + + if (ops.find(op_name) != ops.end() && + ops[op_name].find(type) != ops[op_name].end()) { + return true; + } + return false; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu/xpu_op_list.h b/paddle/fluid/platform/xpu/xpu_op_list.h new file mode 100644 index 0000000000000..487bc8ac48b66 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu_op_list.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using pOpKernelType = paddle::framework::OpKernelType; + +bool is_xpu_support_op(std::string op_name, const pOpKernelType& type); + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d646e06d8a47a..fd0b352ec2efa 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -117,7 +117,7 @@ limitations under the License. */ #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef PADDLE_WITH_CRYPTO