Skip to content

Commit

Permalink
[MT484] fix(phi): fic musa version check problem (PaddlePaddle#56)
Browse files Browse the repository at this point in the history
  • Loading branch information
caizhi-mt authored and mt-robot committed Sep 8, 2023
1 parent f21a50d commit a2450cd
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 19 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

```bash
apt-get install ccache
pip install -r requirements.txt
pip install -r python/requirements.txt
```

#### Set Important Environment Variables
Expand Down Expand Up @@ -76,7 +76,8 @@ docker run -it --privileged --name=paddle_musa_dev --env MTHREADS_VISIBLE_DEVICE

| Docker Tag | Description |
| ---- | --- |
| [**v0.1.4/latest**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.2 (driver2.2.0 develop or newer)<br> mcc-20230823-daily <br> mudnn 20230823-daily <br> mccl_20230823-daily <br> muAlg_dev-20230823-daily <br> muRAND_dev1.0.0 <br> muSPARSE_dev0.1.0 <br> muThrust_dev-0.1.1 |
| [**v0.1.6/latest**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-20230903 (driver2.3.0 develop or newer)<br> mudnn 20230903-daily <br> mccl_20230903-daily <br> muAlg_dev-20230903-daily <br> muRAND_dev1.0.0 <br> muSPARSE_dev0.1.0 <br> muThrust_dev-0.1.1 |
| [**v0.1.4**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.2 (driver2.2.0 develop or newer)<br> mcc-20230823-daily <br> mudnn 20230823-daily <br> mccl_20230823-daily <br> muAlg_dev-20230823-daily <br> muRAND_dev1.0.0 <br> muSPARSE_dev0.1.0 <br> muThrust_dev-0.1.1 |
| [**v0.1.3**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.0 (ddk_1.4.0 develop or newer)<br> mcc-20230814-daily <br> mudnn v1.4.0 <br> mccl_rc1.1.0 <br> muAlg_dev-20230814-daily <br> muRAND_dev1.0.0 <br> muSPARSE_dev0.1.0 <br> muThrust_dev-0.1.1 |
| [**v0.1.2**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.0 (ddk_1.4.0 develop or newer)<br> mcc-20230814-daily <br> mudnn v1.4.0 <br> mccl_rc1.1.0 <br> muAlg_dev-20230814-daily <br> muRAND_dev1.0.0 <br> muSPARSE_dev0.1.0 <br> muThrust_dev-0.1.1 |
| [**v0.1.1**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.0 (ddk_1.4.0 develop or newer)<br> mudnn v1.4.0 <br> mccl_rc1.1.0 <br> muAlg_dev-0.1.1 <br> muRAND_dev1.0.0 <br> muSPARSE_dev0.1.0 <br> muThrust_dev-0.1.1 |
Expand Down
3 changes: 3 additions & 0 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,9 @@ function(cc_test_build TARGET_NAME)
endif()
if(WITH_MUSA)
target_link_libraries(${TARGET_NAME} ${MUSARTC_LIB})
# libtinfo.so depended by libmusa.so is located in '/usr/lib/x86_64-linux-gnu/'
target_link_options(${TARGET_NAME} PRIVATE
-Wl,-rpath,/usr/lib/x86_64-linux-gnu/)
endif()
check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS})
endif()
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/pybind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ if(WITH_PYTHON)
add_executable(eager_legacy_op_function_generator
eager_legacy_op_function_generator.cc)
if(WITH_MUSA)
# libtinfo.so depended by libmusa.so is located in '/usr/lib/x86_64-linux-gnu/'
target_link_options(eager_legacy_op_function_generator PRIVATE
-Wl,-rpath,/usr/lib/x86_64-linux-gnu/)
endif()
Expand All @@ -304,6 +305,7 @@ if(WITH_PYTHON)
if(NOT WIN32)
add_executable(kernel_signature_generator kernel_signature_generator.cc)
if(WITH_MUSA)
# libtinfo.so depended by libmusa.so is located in '/usr/lib/x86_64-linux-gnu/'
target_link_options(kernel_signature_generator PRIVATE
-Wl,-rpath,/usr/lib/x86_64-linux-gnu/)
endif()
Expand Down
40 changes: 26 additions & 14 deletions paddle/phi/backends/gpu/gpu_resources.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@ void InitGpuProperties(Place place,
}
#endif

#ifdef PADDLE_WITH_MUSA
LOG_FIRST_N(INFO, 1) << "Please NOTE: device: "
<< static_cast<int>(place.device)
<< ", GPU Compute Capability: "
<< *compute_capability / 10 << "."
<< *compute_capability % 10
<< ", Driver API Version: " << *driver_version / 10000
<< "." << (*driver_version % 10000) / 100
<< ", Runtime API Version: " << *runtime_version / 10000
<< "." << (*runtime_version % 10000) / 100;
#else
// TODO(wilber): glog may be replaced in the future?
LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: "
<< static_cast<int>(place.device)
Expand All @@ -134,6 +145,7 @@ void InitGpuProperties(Place place,
<< ", Runtime API Version: "
<< *runtime_version / 1000 << "."
<< (*runtime_version % 100) / 10;
#endif
#ifdef PADDLE_WITH_HIP
size_t miopen_major, miopen_minor, miopen_patch;
PADDLE_ENFORCE_GPU_SUCCESS(
Expand All @@ -156,39 +168,39 @@ void InitGpuProperties(Place place,
// TODO(@caizhi): mudnnGetVersion is not supported for MUSA now.
// Requests have been submitted to Mudnn.
// size_t mudnn_dso_ver = dynload::mudnnGetVersion();
size_t mudnn_dso_ver = 1100;
LOG_FIRST_N(WARNING, 1) << "device: " << static_cast<int>(place.device)
<< ", muDNN Version: " << mudnn_dso_ver / 1000 << "."
<< (mudnn_dso_ver % 1000) / 100 << ".";
size_t mudnn_dso_ver = 2300;
LOG_FIRST_N(INFO, 1) << "device: " << static_cast<int>(place.device)
<< ", muDNN Version: " << mudnn_dso_ver / 1000 << "."
<< (mudnn_dso_ver % 1000) / 100 << ".";

// Check MUSA/MUDNN version compatiblity
auto local_musa_version =
(*driver_version / 1000) * 10 + (*driver_version % 100) / 10;
auto compile_musa_version =
(MUSA_VERSION / 1000) * 10 + (MUSA_VERSION % 100) / 10;
auto local_musa_version = *driver_version;
int compile_musa_version = MUSA_VERSION;
#if defined(__linux__)
PADDLE_ENFORCE_EQ(
(local_musa_version / 10 < compile_musa_version / 10) &&
(local_musa_version / 100 < compile_musa_version / 100) &&
(mudnn_dso_ver / 1000 < MUDNN_VERSION / 1000),
false,
phi::errors::InvalidArgument(
"The installed Paddle is compiled with MUDA%d/muDNN%d,"
"The installed Paddle is compiled with MUSA%d/muDNN%d,"
"but MUSA/muDNN version in your machine is MUSA%d/muDNN%d. "
"which will cause serious incompatible bug. "
"Please recompile or reinstall Paddle with compatible MUSA/muDNN "
"version.",
compile_musa_version / 10,
compile_musa_version / 10000,
MUDNN_VERSION / 1000,
local_musa_version / 10,
local_musa_version / 10000,
mudnn_dso_ver / 1000));
#endif
if (local_musa_version < compile_musa_version) {
LOG_FIRST_N(WARNING, 1)
<< "WARNING: device: " << static_cast<int>(place.device)
<< ". The installed Paddle is compiled with MUSA "
<< compile_musa_version / 10 << "." << compile_musa_version % 10
<< compile_musa_version / 10000 << "."
<< (compile_musa_version % 1000) / 100
<< ", but MUSA runtime version in your machine is "
<< local_musa_version / 10 << "." << local_musa_version % 10
<< local_musa_version / 10000 << "."
<< (local_musa_version % 1000) / 100
<< ", which may cause serious incompatible bug. "
<< "Please recompile or reinstall Paddle with compatible MUSA "
"version.";
Expand Down
8 changes: 5 additions & 3 deletions paddle/phi/backends/gpu/musa/musa_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ int DnnVersion() {
if (!dynload::HasCUDNN()) return -1;
// TODO(@caizhi): mudnnGetVersion is not supported now.
// version info will be returned from mudnnGetVersion later.
const int version_major = 1;
const int version_minor = 1;
const int version_major = 2;
const int version_minor = 3;
const int version_patch = 0;
return version_major * 1000 + version_minor * 100 + version_patch;
}
Expand Down Expand Up @@ -99,7 +99,7 @@ int GetGPUComputeCapability(int id) {

PADDLE_ENFORCE_GPU_SUCCESS(major_error_code);
PADDLE_ENFORCE_GPU_SUCCESS(minor_error_code);
return major * 100 + minor;
return major * 10 + minor;
}

int GetGPURuntimeVersion(int id) {
Expand All @@ -111,6 +111,7 @@ int GetGPURuntimeVersion(int id) {
id,
GetGPUDeviceCount()));
int runtime_version = 0;
// Note: runtime_version = MAJOR * 10000 + MINOR * 100 + PATCH
PADDLE_ENFORCE_GPU_SUCCESS(musaRuntimeGetVersion(&runtime_version));
return runtime_version;
}
Expand All @@ -124,6 +125,7 @@ int GetGPUDriverVersion(int id) {
id,
GetGPUDeviceCount()));
int driver_version = 0;
// Note: driver_version = MAJOR * 10000 + MINOR * 100 + PATCH
PADDLE_ENFORCE_GPU_SUCCESS(musaDriverGetVersion(&driver_version));
return driver_version;
}
Expand Down
2 changes: 2 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} ${FLUID_PY_FILES})

if(WITH_GPU)
set(PACKAGE_NAME "paddlepaddle-gpu")
elseif(WITH_MUSA)
set(PACKAGE_NAME "paddlepaddle-musa")
elseif(WITH_ROCM)
set(PACKAGE_NAME "paddlepaddle-rocm")
elseif(WITH_XPU)
Expand Down
3 changes: 3 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ if(${len} GREATER_EQUAL 1)
endif()
if(WITH_MUSA)
target_link_libraries(${test_name} ${MUSARTC_LIB})
# libtinfo.so depended by libmusa.so is located in '/usr/lib/x86_64-linux-gnu/'
target_link_options(${test_name} PRIVATE
-Wl,-rpath,/usr/lib/x86_64-linux-gnu/)
endif()
if(APPLE)
target_link_libraries(
Expand Down

0 comments on commit a2450cd

Please sign in to comment.