Skip to content

Commit

Permalink
add cusparseLt 0.4 to speed up ffn1,ffn2,qkvo multiplication,speed up…
Browse files Browse the repository at this point in the history
… 22% (#107)

Co-authored-by: yangjunchao <yangjunchao@baidu.com>
  • Loading branch information
chao9527 and yangjunchao authored Jan 9, 2024
1 parent 4b57358 commit 80933a8
Show file tree
Hide file tree
Showing 30 changed files with 2,255 additions and 461 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" ${WITH_ASCEND})
option(WITH_ASCEND_CXX11 "Compile PaddlePaddle with ASCEND and CXX11 ABI" OFF)
option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF)
option(WITH_CUSPARSELT "Compile PaddlePaddle with CUSPARSELT" OFF)
option(WITH_CUSPARSELT4 "Compile PaddlePaddle with CUSPARSELT version 0.4" ON)
# Note(zhouwei): It use option above, so put here
include(init)
include(generic) # simplify cmake module
Expand Down
61 changes: 61 additions & 0 deletions cmake/external/cusparselt4.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if(NOT WITH_CUSPARSELT4)
return()
endif()

if(WITH_ARM OR WIN32)
message(SEND_ERROR "The current sparselt support linux only")
return()
endif()

include(ExternalProject)

set(CUSPARSELT_PROJECT "extern_cusparselt")
set(CUSPARSELT_P "https://developer.download.nvidia.com/compute")
set(CUSPARSELT_F "libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz")
set(CUSPARSELT_URL
"${CUSPARSELT_P}/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_F}"
CACHE STRING "" FORCE)
set(CUSPARSELT_PREFIX_DIR ${THIRD_PARTY_PATH}/cusparselt)
set(CUSPARSELT_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cusparselt)
set(CUSPARSELT_INC_DIR
"${CUSPARSELT_INSTALL_DIR}/include"
CACHE PATH "sparselt include directory." FORCE)
set(CUSPARSELT_LIB_DIR
"${CUSPARSELT_INSTALL_DIR}/lib"
CACHE PATH "sparselt lib directory." FORCE)
set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
include_directories(${CUSPARSELT_INC_DIR})

ExternalProject_Add(
${CUSPARSELT_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${CUSPARSELT_URL}
PREFIX ${CUSPARSELT_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory
${CUSPARSELT_PREFIX_DIR}/src/extern_cusparselt/lib ${CUSPARSELT_LIB_DIR} &&
${CMAKE_COMMAND} -E copy_directory
${CUSPARSELT_PREFIX_DIR}/src/extern_cusparselt/include ${CUSPARSELT_INC_DIR}
UPDATE_COMMAND "")

add_library(cusparselt INTERFACE)
add_dependencies(cusparselt ${CUSPARSELT_PROJECT})
set(CUSPARSELT4_FOUND ON)
add_definitions(-DPADDLE_WITH_CUSPARSELT4)
2 changes: 1 addition & 1 deletion cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ function(copy_part_of_thrid_party TARGET DST)
DSTS ${dst_dir} ${dst_dir})
endif()

if(WITH_SPARSELT)
if(WITH_SPARSELT OR WITH_SPARSELT4)
set(dst_dir "${DST}/third_party/install/cusparselt")
copy(
${TARGET}
Expand Down
5 changes: 5 additions & 0 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,11 @@ if(WITH_CUSPARSELT)
list(APPEND third_party_deps extern_cusparselt)
endif()

if(WITH_CUSPARSELT4)
include(external/cusparselt4) # download, build, install cusparselt 0.4
list(APPEND third_party_deps extern_cusparselt)
endif()

if(WITH_GPU
AND NOT WITH_ARM
AND NOT WIN32
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "paddle/fluid/inference/utils/model_utils.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/fused/cusparseLt.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"
Expand Down Expand Up @@ -435,6 +436,8 @@ void AnalysisPredictor::InitDeviceContexts() {
gpu_context->SetSolverHandle(
gpu_resource->GetSolverDnHandleCreator());
gpu_context->SetSparseHandle(gpu_resource->GetSparseHandleCreator());
gpu_context->SetSparseLtHandle(
gpu_resource->GetSparseLtHandleCreator());
gpu_context->SetEigenDevice(gpu_resource->GetGpuEigenDeviceCreator());
gpu_context->SetComputeCapability(
gpu_resource->GetGpuComputeCapability());
Expand All @@ -447,6 +450,10 @@ void AnalysisPredictor::InitDeviceContexts() {
gpu_resource->GetGPUMultiProcessors());
gpu_context->SetDriverVersion(gpu_resource->GetGpuDriverVersion());
gpu_context->SetRuntimeVersion(gpu_resource->GetGpuRuntimeVersion());
paddle::operators::WeightCache::Instance().init(
place_,
phi::Stream(
reinterpret_cast<phi::StreamId>(gpu_resource->GetStream())));
VLOG(1) << "thread id is " << std::this_thread::get_id()
<< ", stream id is "
<< reinterpret_cast<void *>(gpu_resource->GetStream())
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/infer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class InferGPUContext : public phi::GPUContext {
using phi::GPUContext::SetEigenDevice;
using phi::GPUContext::SetSolverHandle;
using phi::GPUContext::SetSparseHandle;
using phi::GPUContext::SetSparseLtHandle;
using phi::GPUContext::SetStream;
// using phi::GPUContext::SetDnnWorkspaceHandle;
using phi::GPUContext::SetComputeCapability;
Expand Down
23 changes: 23 additions & 0 deletions paddle/fluid/inference/api/resource_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ void GPUContextResource::DestroyGPUResource() {
DestroyBlasLtHandle();
DestroySolverHandle();
DestroySparseHandle();
DestroySparseLtHandle();
}

void GPUContextResource::InitGpuProperties() {
Expand Down Expand Up @@ -228,6 +229,20 @@ void GPUContextResource::DestroySparseHandle() {
phi::DestroySparseHandle(sparse_handle_);
}

void GPUContextResource::InitSparseLtHandle() {
if (sparselt_handle_ == nullptr) {
sparselt_handle_ = &sparselt_ori_handle_;
phi::InitSparseLtHandle(sparselt_handle_);
}
}

void GPUContextResource::DestroySparseLtHandle() {
if (sparselt_handle_ != nullptr) {
phi::DestroySparseLtHandle(sparselt_handle_);
sparselt_handle_ = nullptr;
}
}

phi::Place GPUContextResource::Place() const { return place_; }

gpuStream_t GPUContextResource::GetStream() const { return stream_; }
Expand Down Expand Up @@ -322,6 +337,14 @@ GPUContextResource::GetSparseHandleCreator() {
};
}

std::function<cusparseLtHandle_t*()>
GPUContextResource::GetSparseLtHandleCreator() {
return [&]() {
InitSparseLtHandle();
return sparselt_handle_;
};
}

Eigen::GpuDevice* GPUContextResource::GetGpuEigenDevice() const {
return gpu_eigen_device_.get();
}
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/inference/api/resource_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class GPUContextResource {
std::function<phi::blasLtHandle_t()> GetBlasLtHandleCreator();
std::function<phi::solverHandle_t()> GetSolverDnHandleCreator();
std::function<phi::sparseHandle_t()> GetSparseHandleCreator();
std::function<cusparseLtHandle_t*()> GetSparseLtHandleCreator();
std::function<Eigen::GpuDevice*()> GetGpuEigenDeviceCreator();

gpuStream_t GetStream() const;
Expand Down Expand Up @@ -106,6 +107,8 @@ class GPUContextResource {
void DestroySolverHandle();
void InitSparseHandle();
void DestroySparseHandle();
void InitSparseLtHandle();
void DestroySparseLtHandle();

private:
phi::Place place_;
Expand All @@ -130,6 +133,8 @@ class GPUContextResource {
dnnHandle_t dnn_handle_{nullptr};
phi::solverHandle_t solver_handle_{nullptr};
phi::sparseHandle_t sparse_handle_{nullptr};
cusparseLtHandle_t* sparselt_handle_{nullptr};
cusparseLtHandle_t sparselt_ori_handle_;
// DnnWorkspaceHandle
};
#endif
Expand Down
Loading

0 comments on commit 80933a8

Please sign in to comment.