forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 73
Closed
Description
Hi,
I was investigating how to invoke a host function from inside a kernel. I came across the __ockl_call_host_function
binding, which seems to go through the same internal function printf
goes through. I have some questions regarding this binding's usage:
- Is it possible to invoke this function from HIP? If so, how do I supply the function's address and the arguments? Why are there 6 args required here? I have a basic working example that I've been trying to compile in HIP. Passing
myFunc
(casted or not) directly to the first argument doesn't work. Also please provide the correct command of compiling the following HIP file and the device offload link flags.
#include <stdio.h>
#include <algorithm>
#include <stdlib.h>
#include <iostream>
#include "hip/hip_runtime.h"
#include <vector>
#include <random>
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
#define NUM 1024
#define THREADS_PER_BLOCK 256
template<typename T>
void initialize_array(std::vector<T> vec) {
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(0.0, 100.0);
for (auto el: vec) {
el = 0;
}
}
extern "C" __device__ long2
__ockl_call_host_function(ulong fptr, ulong arg0, ulong arg1, ulong arg2,
ulong arg3, ulong arg4, ulong arg5, ulong arg6);
template<typename T>
int vector_verify(const std::vector<T> &a, const std::vector<T> &b, std::vector<T> &c) {
int disp = 0;
for (int i = 0; i < a.size(); i++) {
if (c[i] != a[i] + b[i])
disp++;
}
return disp;
}
void myFunc() {
std::cout << "Called by device to run on host\n";
}
template<typename T>
__global__ void k_vector_mult(T* __restrict__ a) {
// int i = blockDim.x * blockIdx.x + threadIdx.x;
// if ( i < vec_size)
// printf("%lx\n", a);
__ockl_call_host_function(0, 0, 0, 0, 0, 0, 0, 0);
a[0] = a[0] + 42;
}
template<typename T>
void hipVectorCopy(T* &d_data, std::vector<T> &h_data, hipMemcpyKind memcpyKind) {
size_t vec_size = sizeof(h_data.data());
if (memcpyKind == hipMemcpyHostToDevice) {
HIP_ASSERT(hipMalloc((void**)&d_data, vec_size));
HIP_ASSERT(hipMemcpy(d_data, h_data.data(), vec_size, hipMemcpyHostToDevice));
}
else if (memcpyKind == hipMemcpyDeviceToHost) {
HIP_ASSERT(hipMemcpy(h_data.data(), d_data, vec_size, hipMemcpyDeviceToHost));
}
else throw std::invalid_argument("Unsupported memcpy kind for hipVectorCopy");
}
int main() {
// Host allocated pointers
std::vector<int> h_A_int(NUM);
// Pointers for device global memory
int* d_A_int;
std::cout << "Initializing vectors.\n";
initialize_array(h_A_int);
std::cout << "Initialized vectors." << std::endl;
std::cout << "Copying vectors to device.\n";
hipVectorCopy(d_A_int, h_A_int, hipMemcpyHostToDevice);
std::cout << "Copied vectors to device.\n";
dim3 grid_dims(NUM / THREADS_PER_BLOCK + (NUM % THREADS_PER_BLOCK) ? 1 : 0);
hipLaunchKernelGGL(k_vector_mult, 1,
1,
0, 0,
d_A_int);
auto err = hipDeviceSynchronize( );
if ( hipSuccess != err ) {
std::cerr << "Kernel launch failed!. Error code: " << err << std::endl;
}
hipVectorCopy(d_A_int, h_A_int, hipMemcpyDeviceToHost);
std::cout << h_A_int[0] << std::endl;
HIP_ASSERT(hipFree(d_A_int));
return 0;
}
- Is it possible to perform HSA tasks (loading and freezing executables) inside the called host functions? Are there any implications I should be aware of?
Tagging @b-sumner @arsenm @bwelton
Thanks in advance!
Metadata
Metadata
Assignees
Labels
No labels