Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/ld preload #1

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cargo_demo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,13 @@
3. 云服务器上没有硬件。

另外,我试着`break cuGetProcAddress_v2`,也是类似的结果。

---

重新试一试LD_PRELOAD,需要的步骤:
1. 封装实际CUDA调用,需要修改test.c文件
2. 创建模拟和重定向函数,也需要修改test.c文件
3. 编译为动态库
4. 使用LD_PRELOAD加载动态库
5. 运行程序,测试效果

23 changes: 23 additions & 0 deletions cargo_demo/lib.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#include <cuda.h>

CUresult mock_cuInit(unsigned int flags);
CUresult actual_cuInit(unsigned int flags);

// 重定向函数
CUresult redirect_cuInit(unsigned int flags) {
mock_cuInit(flags);
return actual_cuInit(flags);
}

// mock_cuInit 函数
CUresult mock_cuInit(unsigned int flags) {
// 模拟CUDA调用
printf("mock_cuInit called with flags %d\n", flags);
return CUDA_SUCCESS;
}

// actual_cuInit 函数,用于调用 cuInit
CUresult actual_cuInit(unsigned int flags) {
printf("actual_cuInit called with flags %d\n", flags);
return cuInit(flags);
}
Binary file added cargo_demo/libredirect.so
Binary file not shown.
Binary file modified cargo_demo/test
Binary file not shown.
38 changes: 30 additions & 8 deletions cargo_demo/test.c
Original file line number Diff line number Diff line change
@@ -1,31 +1,53 @@
#include <stdio.h>
#include <cuda.h>

// 函数声明
CUresult mock_cuInit(unsigned int flags);
CUresult actual_cuInit(unsigned int flags);

// 重定向函数
CUresult redirect_cuInit(unsigned int flags) {
mock_cuInit(flags);
return actual_cuInit(flags);
}

// mock_cuInit 函数
CUresult mock_cuInit(unsigned int flags) {
// 模拟CUDA调用
printf("mock_cuInit called with flags %d\n", flags);
return CUDA_SUCCESS;
}

// actual_cuInit 函数,用于调用 cuInit
CUresult actual_cuInit(unsigned int flags) {
printf("actual_cuInit called with flags %d\n", flags);
return cuInit(flags);
}

// 修改 cuInitPtr 的类型
typedef CUresult (*cuInitFunc)(unsigned int);

int main() {
CUresult result;
cuInitFunc cuInitPtr;

int pi;
int dev;
int dev;
cuDeviceGet(&dev, 0);

char* cuInitStr = (char*)"cuInit";

result = cuGetProcAddress_v2(cuInitStr, (void **)&cuInitPtr, 0, 0, NULL);
if (result != CUDA_SUCCESS) {
printf("Error: cuGetProcAddress_v2 failed with error %d\n", result);
return -1;
// 修改 test.c 中 cuInitPtr 的获取和调用
result = cuGetProcAddress_v2((char*)"redirect_cuInit", (void **)&cuInitPtr, 0, 0, NULL);

if (result == CUDA_SUCCESS) {
cuInitPtr(0);
}

result = cuDeviceGetAttribute(&pi, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, dev);

if (result == CUDA_SUCCESS) {
result = cuInitPtr(0);
// result = cuInitPtr(0x1); // 添加 CUDA_IPC_ENABLE 标志
// result = cuInitPtr(0x2); // 添加 CUDA_SCHEDULE_AUTO 标志
// result = cuInitPtr(0x1 | 0x2); // 同时启用IPC和自动线程调度
}

if (result != CUDA_SUCCESS) {
Expand Down