Docs: Refactor hip driver API example code

ROCm · Sep 30, 2024 · fd4530d · fd4530d
1 parent ce89adc
commit fd4530d
Showing 1 changed file with 39 additions and 47 deletions.
diff --git a/docs/how-to/hip_porting_driver_api.rst b/docs/how-to/hip_porting_driver_api.rst
@@ -235,77 +235,69 @@ The sample below shows how to use ``hipModuleGetFunction``.
 
 .. code-block:: cpp
 
- #include <hip_runtime.h>
- #include <hip_runtime_api.h>
+ #include <hip/hip_runtime.h>
+ #include <hip/hip_runtime_api.h>
 
- #include <fstream>
- #include <iostream>
  #include <vector>
 
- #define LEN 64
- #define SIZE LEN << 2
-
- #ifdef __HIP_PLATFORM_AMD__
- #define fileName "vcpy_isa.co"
- #endif
-
- #ifdef __HIP_PLATFORM_NVIDIA__
- #define fileName "vcpy_isa.ptx"
- #endif
-
- #define kernel_name "hello_world"
-
  int main() {
- float *A, *B;
- hipDeviceptr_t Ad, Bd;
- A = new float[LEN];
- B = new float[LEN];
 
- for (uint32_t i = 0; i < LEN; i++) {
- A[i] = i * 1.0f;
- B[i] = 0.0f;
- std::cout << A[i] << " " << B[i] << std::endl;
- }
+ size_t elements = 64*1024;
+ size_t size_bytes = elements * sizeof(float);
 
+ std::vector<float> A(elements), B(elements);
+
+ // On NVIDIA platforms the driver runtime needs to be initiated
  #ifdef __HIP_PLATFORM_NVIDIA__
  hipInit(0);
  hipDevice_t device;
  hipCtx_t context;
- hipDeviceGet(&device, 0);
- hipCtxCreate(&context, 0, device);
+ HIPCHECK(hipDeviceGet(&device, 0));
+ HIPCHECK(hipCtxCreate(&context, 0, device));
  #endif
 
- hipMalloc((void **)&Ad, SIZE);
- hipMalloc((void **)&Bd, SIZE);
+ // Allocate device memory
+ hipDeviceptr_t d_A, d_B;
+ HIPCHECK(hipMalloc(&d_A, size_bytes));
+ HIPCHECK(hipMalloc(&d_B, size_bytes));
+
+ // Copy data to device
+ HIPCHECK(hipMemcpyHtoD(d_A, A.data(), size_bytes));
+ HIPCHECK(hipMemcpyHtoD(d_B, B.data(), size_bytes));
 
- hipMemcpyHtoD(Ad, A, SIZE);
- hipMemcpyHtoD(Bd, B, SIZE);
+ // Load module
  hipModule_t Module;
+ // For AMD the module file has to contain architecture specific object codee
+ // For NVIDIA the module file has to contain PTX, found in e.g. "vcpy_isa.ptx"
+ HIPCHECK(hipModuleLoad(&Module, "vcpy_isa.co"));
+ // Get kernel function from the module via its name
  hipFunction_t Function;
- hipModuleLoad(&Module, fileName);
- hipModuleGetFunction(&Function, Module, kernel_name);
+ HIPCHECK(hipModuleGetFunction(&Function, Module, "hello_world"));
 
- std::vector<void *> argBuffer(2);
- memcpy(&argBuffer[0], &Ad, sizeof(void *));
- memcpy(&argBuffer[1], &Bd, sizeof(void *));
+ // Create buffer for kernel arguments
+ std::vector<void*> argBuffer{&d_A, &d_B};
+ size_t arg_size_bytes = argBuffer.size() * sizeof(void*);
 
- size_t size = argBuffer.size() * sizeof(void *);
+ // Create configuration passed to the kernel as arguments
+ void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, argBuffer.data(),
+ HIP_LAUNCH_PARAM_BUFFER_SIZE, &arg_size_bytes, HIP_LAUNCH_PARAM_END};
 
- void *config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &argBuffer[0],
-  HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
+ int threads_per_block = 128;
+ int blocks = (elements + threads_per_block - 1) / threads_per_block;
 
- hipModuleLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL,
-  (void **)&config);
+ // Actually launch kernel
+ HIPCHECK(hipModuleLaunchKernel(Function, blocks, 1, 1, threads_per_block, 1, 1, 0, 0, NULL, config));
 
- hipMemcpyDtoH(B, Bd, SIZE);
- for (uint32_t i = 0; i < LEN; i++) {
- std::cout << A[i] << " - " << B[i] << std::endl;
- }
+ HIPCHECK(hipMemcpyDtoH(A.data(), d_A, elements));
+ HIPCHECK(hipMemcpyDtoH(B.data(), d_B, elements));
 
  #ifdef __HIP_PLATFORM_NVIDIA__
- hipCtxDetach(context);
+ HIPCHECK(hipCtxDetach(context));
  #endif
 
+ HIPCHECK(hipFree(d_A));
+ HIPCHECK(hipFree(d_B));
+
  return 0;
  }