Skip to content

Commit

Permalink
Docs: Refactor hip driver API example code
Browse files Browse the repository at this point in the history
  • Loading branch information
MKKnorr authored and neon60 committed Sep 30, 2024
1 parent ce89adc commit fd4530d
Showing 1 changed file with 39 additions and 47 deletions.
86 changes: 39 additions & 47 deletions docs/how-to/hip_porting_driver_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -235,77 +235,69 @@ The sample below shows how to use ``hipModuleGetFunction``.

.. code-block:: cpp
#include <hip_runtime.h>
#include <hip_runtime_api.h>
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#include <fstream>
#include <iostream>
#include <vector>
#define LEN 64
#define SIZE LEN << 2
#ifdef __HIP_PLATFORM_AMD__
#define fileName "vcpy_isa.co"
#endif
#ifdef __HIP_PLATFORM_NVIDIA__
#define fileName "vcpy_isa.ptx"
#endif
#define kernel_name "hello_world"
int main() {
float *A, *B;
hipDeviceptr_t Ad, Bd;
A = new float[LEN];
B = new float[LEN];
for (uint32_t i = 0; i < LEN; i++) {
A[i] = i * 1.0f;
B[i] = 0.0f;
std::cout << A[i] << " " << B[i] << std::endl;
}
size_t elements = 64*1024;
size_t size_bytes = elements * sizeof(float);
std::vector<float> A(elements), B(elements);
// On NVIDIA platforms the driver runtime needs to be initiated
#ifdef __HIP_PLATFORM_NVIDIA__
hipInit(0);
hipDevice_t device;
hipCtx_t context;
hipDeviceGet(&device, 0);
hipCtxCreate(&context, 0, device);
HIPCHECK(hipDeviceGet(&device, 0));
HIPCHECK(hipCtxCreate(&context, 0, device));
#endif
hipMalloc((void **)&Ad, SIZE);
hipMalloc((void **)&Bd, SIZE);
// Allocate device memory
hipDeviceptr_t d_A, d_B;
HIPCHECK(hipMalloc(&d_A, size_bytes));
HIPCHECK(hipMalloc(&d_B, size_bytes));
// Copy data to device
HIPCHECK(hipMemcpyHtoD(d_A, A.data(), size_bytes));
HIPCHECK(hipMemcpyHtoD(d_B, B.data(), size_bytes));
hipMemcpyHtoD(Ad, A, SIZE);
hipMemcpyHtoD(Bd, B, SIZE);
// Load module
hipModule_t Module;
// For AMD the module file has to contain architecture specific object codee
// For NVIDIA the module file has to contain PTX, found in e.g. "vcpy_isa.ptx"
HIPCHECK(hipModuleLoad(&Module, "vcpy_isa.co"));
// Get kernel function from the module via its name
hipFunction_t Function;
hipModuleLoad(&Module, fileName);
hipModuleGetFunction(&Function, Module, kernel_name);
HIPCHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
std::vector<void *> argBuffer(2);
memcpy(&argBuffer[0], &Ad, sizeof(void *));
memcpy(&argBuffer[1], &Bd, sizeof(void *));
// Create buffer for kernel arguments
std::vector<void*> argBuffer{&d_A, &d_B};
size_t arg_size_bytes = argBuffer.size() * sizeof(void*);
size_t size = argBuffer.size() * sizeof(void *);
// Create configuration passed to the kernel as arguments
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, argBuffer.data(),
HIP_LAUNCH_PARAM_BUFFER_SIZE, &arg_size_bytes, HIP_LAUNCH_PARAM_END};
void *config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &argBuffer[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
int threads_per_block = 128;
int blocks = (elements + threads_per_block - 1) / threads_per_block;
hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL,
(void **)&config);
// Actually launch kernel
HIPCHECK(hipModuleLaunchKernel(Function, blocks, 1, 1, threads_per_block, 1, 1, 0, 0, NULL, config));
hipMemcpyDtoH(B, Bd, SIZE);
for (uint32_t i = 0; i < LEN; i++) {
std::cout << A[i] << " - " << B[i] << std::endl;
}
HIPCHECK(hipMemcpyDtoH(A.data(), d_A, elements));
HIPCHECK(hipMemcpyDtoH(B.data(), d_B, elements));
#ifdef __HIP_PLATFORM_NVIDIA__
hipCtxDetach(context);
HIPCHECK(hipCtxDetach(context));
#endif
HIPCHECK(hipFree(d_A));
HIPCHECK(hipFree(d_B));
return 0;
}
Expand Down

0 comments on commit fd4530d

Please sign in to comment.