-
Notifications
You must be signed in to change notification settings - Fork 1
/
README
80 lines (62 loc) · 2.87 KB
/
README
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
Compiling
First you need to install papi library.
Then set the library directory and the library name when compiling. See the
attached Makefile.
Also set the nvml library in Makefile.
Read the comments in "high_resolution_power.h" file for the usage instructions.
Here are some sample usages.
This is the call_cuda_kernel function for Jacobi1D
//-------------------------------------------------------------------------------------------------------------------------------------
void *call_gpu_functions(void *nothing) {
temp1 = 0;
temp2 = 0;
temp3 = 0;
temp4 = 0;
dim3 dimBlock(BlockWidth);
for(kernelCall=1; kernelCall <= totalKernelCalls ; kernelCall++){
width = computeWidth(S,T,kernelCall*TileSize-1);
dim3 dimGrid(width);
if(kernelCall >= S/TileSize + 1){
if((totalKernelCalls-kernelCall)%2 == 0){
jacobiKernel_8_type3<<<dimGrid, dimBlock>>>(device_A, device_temp1, device_temp2, S, T, kernelCall, width, totalKernelCalls);
cudaCheckKernel();
temp3++;
}
else{
jacobiKernel_8_type4<<<dimGrid, dimBlock>>>(device_A, device_temp1, device_temp2, S, T, kernelCall, width, totalKernelCalls);
cudaCheckKernel();
temp4++;
}
}
else{
if(kernelCall <= 2*T/TileSize -1 and kernelCall%2 == 1){
jacobiKernel_8_type1<<<dimGrid, dimBlock>>>(device_A, device_temp1, device_temp2, S, T, kernelCall, width, totalKernelCalls);
cudaCheckKernel();
temp1++;
}
else{
jacobiKernel_8_type2<<<dimGrid, dimBlock>>>(device_A, device_temp1, device_temp2, S, T, kernelCall, width, totalKernelCalls);
cudaCheckKernel();
temp2++;
}
}
}
cudaDeviceSynchronize();
return NULL;
}
//---------------------------------------------------------------------------------------------------------------------------------------
This is the reset_kenel_data function for Smith Waterman
//-------------------------------------------------------------------------------------------------------------------------------------
void *gpu_data_reset(void *nothing) {
CUDA_CHECK_RETURN(cudaMemset((void *)dABH, 0, sizeof(int)*(lenA+lenB)));
CUDA_CHECK_RETURN(cudaMemset((void *)dABEF, 0, sizeof(int)*(lenA+lenB)));
CUDA_CHECK_RETURN(cudaMemset((void *)dCorner, 0, sizeof(int)*3*tilesPerBlock));
return NULL;
}
//-------------------------------------------------------------------------------------------------------------------------------------
Now, instead of calling actual cuda kernels, you can call "call_gpu_functions"
To measure time:
long long elapsed1 = get_exec_time_in_nanoseconds(call_gpu_functions, gpu_data_reset);
To profile power:
high_resolution_power_profile(call_gpu_functions, gpu_data_reset);
The output power profile will be writen to the stdout.