[MTAI-484] feat(ci): download 3rd_party form oss (PaddlePaddle#59)

* [MTAI-484] feat(ci): download 3rd_party form oss * [MT-484] fix(ci): update eigen3 commit * [MTAI-484] use eigen3 patches to replace extra third_party
hanhaowen-mt · Aug 22, 2023 · 152b9e5 · 152b9e5
1 parent 9c96836
commit 152b9e5
Show file tree

Hide file tree

Showing 14 changed files with 323 additions and 7 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -52,8 +52,7 @@
 	ignore = dirty
 [submodule "third_party/eigen3"]
 	path = third_party/eigen3
-	url = https://gitlab.com/paipinuo233/eigen.git
-	branch = support_musa
+	url = https://gitlab.com/libeigen/eigen.git
 	ignore = dirty
 [submodule "third_party/snappy"]
 	path = third_party/snappy

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -54,6 +54,7 @@ pipeline {
             sh """#!/bin/bash
             cd ${env.paddle_musa_working_dir}
             git config --global --add safe.directory "*"
+            pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
             /opt/conda/condabin/conda run -n py38 --no-capture-output BRANCH=origin/develop /bin/bash tools/codestyle/pre_commit.sh
             #git diff --name-only origin/develop..HEAD | xargs pre-commit run --files
             """

diff --git a/ci/build.sh b/ci/build.sh
@@ -49,9 +49,9 @@ while true;do
 done
 
 pushd ${PADDLE_MUSA_DIR}
-# prepare submodules by copying from the local repo,
-# in this case, CI docker need to be updated once submodules' version changed
-cp -r ${PADDLE_MUSA_REPO_PATH}/third_party/. third_party
+. $CUR_DIR/download_third_party.sh
+# The thiry_party extracted from the compressed package is not a git repository,
+# but it is required to be a git repository to patch when compiling.
 cp -r ${PADDLE_MUSA_REPO_PATH}/.git/modules .git
 
 export INFERENCE_DEMO_INSTALL_DIR="/home/data/paddle_musa/.cache/build"

diff --git a/ci/download_third_party.sh b/ci/download_third_party.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+DATE=$(date +%Y%m%d)
+thirdy_party_path=./third_party_${DATE}
+
+third_party_link=https://oss.mthreads.com/mt-ai-data/paddle_musa/third_party.tar.gz
+wget --no-check-certificate ${third_party_link} -P ${thirdy_party_path}
+tar -zxf ${thirdy_party_path}/third_party.tar.gz
+rm -rf ${thirdy_party_path}
diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake
@@ -17,7 +17,7 @@ include(ExternalProject)
 # update eigen to the commit id f612df27 on 03/16/2021
 set(EIGEN_PREFIX_DIR ${THIRD_PARTY_PATH}/eigen3)
 set(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3/src/extern_eigen3)
-set(EIGEN_TAG 6ad1f10acbc311dd82b20cce7f5c305ae8c3eaa9)
+set(EIGEN_TAG f612df273689a19d25b45ca4f8269463207c4fee)
 set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/eigen3)
 
 if(WIN32)
@@ -60,6 +60,64 @@ if(CMAKE_COMPILER_IS_GNUCC)
         ${EIGEN_PATCH_COMMAND} && patch -Nd
         ${SOURCE_DIR}/Eigen/src/Core/arch/SSE/ < ${complex_header})
   endif()
+  if(WITH_MUSA)
+    file(
+      TO_NATIVE_PATH
+      ${PADDLE_SOURCE_DIR}/patches/eigen/Eigen_src_Core_util_ConfigureVectorization.h.patch
+      configure_vectorization_header)
+    set(EIGEN_PATCH_COMMAND
+        ${EIGEN_PATCH_COMMAND} && patch -Nd ${SOURCE_DIR}/Eigen/src/Core/util/
+        < ${configure_vectorization_header})
+    file(TO_NATIVE_PATH
+         ${PADDLE_SOURCE_DIR}/patches/eigen/Eigen_src_Core_util_Macros.h.patch
+         util_macros_header)
+    set(EIGEN_PATCH_COMMAND
+        ${EIGEN_PATCH_COMMAND} && patch -Nd ${SOURCE_DIR}/Eigen/src/Core/util/
+        < ${util_macros_header})
+    file(TO_NATIVE_PATH
+         ${PADDLE_SOURCE_DIR}/patches/eigen/Eigen_src_Core_util_Meta.h.patch
+         meta_header)
+    set(EIGEN_PATCH_COMMAND ${EIGEN_PATCH_COMMAND} && patch -Nd
+                            ${SOURCE_DIR}/Eigen/src/Core/util/ < ${meta_header})
+    file(TO_NATIVE_PATH
+         ${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_Tensor.patch
+         cxx11_tensor)
+    set(EIGEN_PATCH_COMMAND
+        ${EIGEN_PATCH_COMMAND} && patch -Nd
+        ${SOURCE_DIR}/unsupported/Eigen/CXX11/ < ${cxx11_tensor})
+    file(
+      TO_NATIVE_PATH
+      ${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorContractionGpu.h.patch
+      tensor_contraction_gpu_header)
+    set(EIGEN_PATCH_COMMAND
+        ${EIGEN_PATCH_COMMAND} && patch -Nd
+        ${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
+        ${tensor_contraction_gpu_header})
+    file(
+      TO_NATIVE_PATH
+      ${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorDeviceDefault.h.patch
+      tensor_device_default_header)
+    set(EIGEN_PATCH_COMMAND
+        ${EIGEN_PATCH_COMMAND} && patch -Nd
+        ${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
+        ${tensor_device_default_header})
+    file(
+      TO_NATIVE_PATH
+      ${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorGpuHipCudaDefines.h.patch
+      tensor_gpu_hip_cuda_defines_header)
+    set(EIGEN_PATCH_COMMAND
+        ${EIGEN_PATCH_COMMAND} && patch -Nd
+        ${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
+        ${tensor_gpu_hip_cuda_defines_header})
+    file(
+      TO_NATIVE_PATH
+      ${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorReduction.h.patch
+      tensor_reduction_header)
+    set(EIGEN_PATCH_COMMAND
+        ${EIGEN_PATCH_COMMAND} && patch -Nd
+        ${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
+        ${tensor_reduction_header})
+  endif()
 endif()
 
 set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})

diff --git a/patches/eigen/Eigen_src_Core_util_ConfigureVectorization.h.patch b/patches/eigen/Eigen_src_Core_util_ConfigureVectorization.h.patch
@@ -0,0 +1,21 @@
+diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h
+index af4e69623..6944be650 100644
+--- a/Eigen/src/Core/util/ConfigureVectorization.h
++++ b/Eigen/src/Core/util/ConfigureVectorization.h
+@@ -470,6 +470,16 @@
+   #include <hip/hip_fp16.h>
+ #endif
+
++#if defined EIGEN_MUSACC
++  #define EIGEN_VECTORIZE_GPU
++  #include <vector_types.h>
++  #define EIGEN_HAS_MUSA_FP16
++#endif
++
++#if defined(EIGEN_HAS_MUSA_FP16)
++  #include <musa_runtime_api.h>
++  #include <musa_fp16.h>
++#endif
+
+ /** \brief Namespace containing all symbols from the %Eigen library. */
+ namespace Eigen {
diff --git a/patches/eigen/Eigen_src_Core_util_Macros.h.patch b/patches/eigen/Eigen_src_Core_util_Macros.h.patch
@@ -0,0 +1,51 @@
+diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
+index bdc0de0ea..8ffbd4291 100644
+--- a/Eigen/src/Core/util/Macros.h
++++ b/Eigen/src/Core/util/Macros.h
+@@ -476,6 +476,19 @@
+   #define EIGEN_CUDA_SDK_VER 0
+ #endif
+
++#if defined(__MUSACC__) && !defined(EIGEN_NO_MUSA)
++  // Means the compiler is either nvcc or clang with CUDA enabled
++  #define EIGEN_MUSACC __MUSACC__
++#endif
++#if defined(EIGEN_MUSACC)
++#include <musa.h>
++#endif
++
++#if defined(__MUSA_ARCH__) && !defined(EIGEN_NO_MUSA)
++  // Means we are generating code for the device
++  #define EIGEN_MUSA_ARCH __MUSA_ARCH__
++#endif
++
+ #if defined(__HIPCC__) && !defined(EIGEN_NO_HIP)
+   // Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP)
+   #define EIGEN_HIPCC __HIPCC__
+@@ -512,7 +525,7 @@
+
+ // Unify CUDA/HIPCC
+
+-#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
++#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC) || defined(EIGEN_MUSACC)
+ //
+ // If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC
+ //
+@@ -535,7 +548,7 @@
+ //
+ #endif
+
+-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
++#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) || defined(EIGEN_MUSA_ARCH)
+ //
+ // If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE
+ //
+@@ -943,7 +956,7 @@
+ // GPU stuff
+
+ // Disable some features when compiling with GPU compilers (NVCC/clang-cuda/SYCL/HIPCC)
+-#if defined(EIGEN_CUDACC) || defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIPCC)
++#if defined(EIGEN_CUDACC) || defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIPCC) || defined(EIGEN_MUSACC)
+   // Do not try asserts on device code
+   #ifndef EIGEN_NO_DEBUG
+   #define EIGEN_NO_DEBUG
diff --git a/patches/eigen/Eigen_src_Core_util_Meta.h.patch b/patches/eigen/Eigen_src_Core_util_Meta.h.patch
@@ -0,0 +1,58 @@
+diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
+index cad57c3a4..785b2a7a9 100755
+--- a/Eigen/src/Core/util/Meta.h
++++ b/Eigen/src/Core/util/Meta.h
+@@ -15,7 +15,7 @@
+
+  #include <cfloat>
+
+- #if defined(EIGEN_CUDA_ARCH)
++ #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_MUSA_ARCH)
+   #include <math_constants.h>
+  #endif
+
+@@ -300,6 +300,8 @@ template<> struct numeric_limits<float>
+   static float (max)() {
+   #if defined(EIGEN_CUDA_ARCH)
+     return CUDART_MAX_NORMAL_F;
++  #elif defined(EIGEN_MUSA_ARCH)
++    return MUSART_MAX_NORMAL_F;
+   #else
+     return HIPRT_MAX_NORMAL_F;
+   #endif
+@@ -310,6 +312,8 @@ template<> struct numeric_limits<float>
+   static float infinity() {
+   #if defined(EIGEN_CUDA_ARCH)
+     return CUDART_INF_F;
++  #elif defined(EIGEN_MUSA_ARCH)
++    return MUSART_INF_F;
+   #else
+     return HIPRT_INF_F;
+   #endif
+@@ -318,6 +322,8 @@ template<> struct numeric_limits<float>
+   static float quiet_NaN() {
+   #if defined(EIGEN_CUDA_ARCH)
+     return CUDART_NAN_F;
++  #elif defined(EIGEN_MUSA_ARCH)
++    return MUSART_NAN_F;
+   #else
+     return HIPRT_NAN_F;
+   #endif
+@@ -335,6 +341,8 @@ template<> struct numeric_limits<double>
+   static double infinity() {
+   #if defined(EIGEN_CUDA_ARCH)
+     return CUDART_INF;
++  #elif defined(EIGEN_MUSA_ARCH)
++    return MUSART_INF;
+   #else
+     return HIPRT_INF;
+   #endif
+@@ -343,6 +351,8 @@ template<> struct numeric_limits<double>
+   static double quiet_NaN() {
+   #if defined(EIGEN_CUDA_ARCH)
+     return CUDART_NAN;
++  #elif defined(EIGEN_MUSA_ARCH)
++    return MUSART_NAN;
+   #else
+     return HIPRT_NAN;
+   #endif
diff --git a/patches/eigen/unsupported_Eigen_CXX11_Tensor.patch b/patches/eigen/unsupported_Eigen_CXX11_Tensor.patch
@@ -0,0 +1,13 @@
+diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
+index d73c6008d..73c02cc50 100644
+--- a/unsupported/Eigen/CXX11/Tensor
++++ b/unsupported/Eigen/CXX11/Tensor
+@@ -57,6 +57,8 @@
+   #include <iostream>
+   #if defined(EIGEN_USE_HIP)
+     #include <hip/hip_runtime.h>
++  #elif defined(EIGEN_USE_MUSA)
++    #include <musa_runtime.h>
+   #else
+     #include <cuda_runtime.h>
+   #endif
diff --git a/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorContractionGpu.h.patch b/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorContractionGpu.h.patch
@@ -0,0 +1,22 @@
+diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+index bb990b378..07f93ab18 100644
+--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
++++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionGpu.h
+@@ -621,7 +621,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
+       x1 = rhs_pf0.x;
+       x2 = rhs_pf0.z;
+     }
+-    #if defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000)
++    #if !defined(EIGEN_MUSACC) && (defined(EIGEN_HIPCC) || (defined(EIGEN_CUDA_SDK_VER) && EIGEN_CUDA_SDK_VER < 90000))
+     x1 = __shfl_xor(x1, 4);
+     x2 = __shfl_xor(x2, 4);
+     #else
+@@ -1399,6 +1399,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
+
+ #if defined(EIGEN_USE_HIP)
+     setGpuSharedMemConfig(hipSharedMemBankSizeEightByte);
++#elif defined(EIGEN_USE_MUSA)
++    setGpuSharedMemConfig(musaSharedMemBankSizeEightByte);
+ #else
+     setGpuSharedMemConfig(cudaSharedMemBankSizeEightByte);
+ #endif
diff --git a/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorDeviceDefault.h.patch b/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorDeviceDefault.h.patch
@@ -0,0 +1,15 @@
+diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+index 46b9d3ab2..3bef5b621 100644
+--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
++++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+@@ -92,6 +92,10 @@ struct DefaultDevice {
+     // Running on a HIP device
+     // return 1 as major for HIP
+     return 1;
++#elif defined(EIGEN_MUSA_ARCH)
++    // Running on a MUSA device
++    // return 1 as major for MUSA
++    return 1;
+ #else
+     // Running on a CUDA device
+     return EIGEN_CUDA_ARCH / 100;
diff --git a/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorGpuHipCudaDefines.h.patch b/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorGpuHipCudaDefines.h.patch
@@ -0,0 +1,40 @@
+diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
+index cb53ce298..19dc119a9 100644
+--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
++++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
+@@ -52,6 +52,35 @@
+ #define gpuDeviceSynchronize hipDeviceSynchronize
+ #define gpuMemcpy hipMemcpy
+
++#elif defined(EIGEN_USE_MUSA)
++#define gpuStream_t musaStream_t
++#define gpuDeviceProp_t musaDeviceProp
++#define gpuError_t musaError_t
++#define gpuSuccess musaSuccess
++#define gpuErrorNotReady musaErrorNotReady
++#define gpuGetDeviceCount musaGetDeviceCount
++#define gpuGetLastError musaGetLastError
++#define gpuPeekAtLastError musaPeekAtLastError
++#define gpuGetErrorName musaGetErrorName
++#define gpuGetErrorString musaGetErrorString
++#define gpuGetDeviceProperties musaGetDeviceProperties
++#define gpuStreamDefault musaStreamDefault
++#define gpuGetDevice musaGetDevice
++#define gpuSetDevice musaSetDevice
++#define gpuMalloc musaMalloc
++#define gpuFree musaFree
++#define gpuMemsetAsync musaMemsetAsync
++#define gpuMemcpyAsync musaMemcpyAsync
++#define gpuMemcpyDeviceToDevice musaMemcpyDeviceToDevice
++#define gpuMemcpyDeviceToHost musaMemcpyDeviceToHost
++#define gpuMemcpyHostToDevice musaMemcpyHostToDevice
++#define gpuStreamQuery musaStreamQuery
++#define gpuSharedMemConfig musaSharedMemConfig
++#define gpuDeviceSetSharedMemConfig musaDeviceSetSharedMemConfig
++#define gpuStreamSynchronize musaStreamSynchronize
++#define gpuDeviceSynchronize musaDeviceSynchronize
++#define gpuMemcpy musaMemcpy
++
+ #else
+
+ #define gpuStream_t cudaStream_t
diff --git a/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorReduction.h.patch b/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorReduction.h.patch
@@ -0,0 +1,13 @@
+diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+index 0a65591e6..74679b700 100644
+--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
++++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+@@ -14,7 +14,7 @@
+ // clang is incompatible with the CUDA syntax wrt making a kernel a class friend,
+ // so we'll use a macro to make clang happy.
+ #ifndef KERNEL_FRIEND
+-#if defined(__clang__) && (defined(__CUDA__) || defined(__HIP__))
++#if defined(__clang__) && (defined(__CUDA__) || defined(__HIP__) || defined(__MUSA__))
+ #define KERNEL_FRIEND friend __global__ EIGEN_HIP_LAUNCH_BOUNDS_1024
+ #else
+ #define KERNEL_FRIEND friend
diff --git a/third_party/eigen3 b/third_party/eigen3