-
Notifications
You must be signed in to change notification settings - Fork 8
/
cudacommon.cu
118 lines (99 loc) · 3.47 KB
/
cudacommon.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/* When compiling the CUDA code, we do not want to include all ecm-impl.h*/
#define _DO_NOT_INCLUDE_ECM_IMPL_H
#include "cudacommon.h"
#include "ecm-gpu.h"
#include <stdio.h>
#ifndef __CUDACC__
#error "This file should only be compiled with nvcc"
#endif
/* First call to a global function initialize the device */
__global__ void Cuda_Init_Device ()
{
}
extern "C"
int
get_device_prop(int device, cudaDeviceProp *deviceProp)
{
cudaError_t err;
if (device!=-1)
{
err = cudaSetDevice(device);
if (err != cudaSuccess)
{
fprintf (stderr, "GPU: Error: Could not use device %d\n", device);
fprintf (stderr, "GPU: Error msg: %s\n", cudaGetErrorString(err));
return 0;
}
}
err = cudaGetDevice (&device);
if (err != cudaSuccess)
{
fprintf (stderr, "GPU: Error: no active device.\n");
fprintf (stderr, "GPU: Error msg: %s\n", cudaGetErrorString(err));
return 0;
}
err = cudaGetDeviceProperties (deviceProp, device);
if (err != cudaSuccess)
{
fprintf (stderr, "GPU: Error while getting device's properties.\n");
fprintf (stderr, "GPU: Error msg: %s\n", cudaGetErrorString(err));
return 0;
}
return 1;
}
extern "C"
int
select_and_init_GPU (int device, unsigned int *number_of_curves, int verbose)
{
cudaDeviceProp deviceProp;
if (device!=-1 && verbose)
fprintf (stdout, "GPU: device %d is required.\n", device);
if (!get_device_prop(device, &deviceProp))
return -1;
if (verbose)
{
printf ("GPU: will use device %d: %s, compute capability %d.%d, %d MPs.\n"
"GPU: maxSharedPerBlock = %zu maxThreadsPerBlock = %d "
"maxRegsPerBlock = %d\n", device, deviceProp.name,
deviceProp.major, deviceProp.minor,
deviceProp.multiProcessorCount, deviceProp.sharedMemPerBlock,
deviceProp.maxThreadsPerBlock, deviceProp.regsPerBlock);
}
if (*number_of_curves == 0) /* if choose the number of curves */
{
/* Limited by the maximum number of threads per MP */
unsigned int blocks_per_multiprocessor = 2;
*number_of_curves = blocks_per_multiprocessor * deviceProp.multiProcessorCount
* ECM_GPU_CURVES_BY_BLOCK;
}
else if (*number_of_curves % ECM_GPU_CURVES_BY_BLOCK != 0)
{
/* number_of_curves should be a multiple of ECM_GPU_CURVES_BY_BLOCK */
*number_of_curves = (*number_of_curves / ECM_GPU_CURVES_BY_BLOCK + 1) *
ECM_GPU_CURVES_BY_BLOCK;
if (verbose)
fprintf(stderr, "GPU: the requested number of curves has been "
"modified to %u\n", *number_of_curves);
}
/* First call to a global function initialize the device */
cuda_check (cudaSetDeviceFlags (cudaDeviceScheduleBlockingSync));
Cuda_Init_Device<<<1, 1>>> ();
cuda_check (cudaGetLastError());
return 0;
}
void
kernel_info(const void* func, int verbose)
{
if (verbose)
{
struct cudaFuncAttributes kernelAttr;
cudaError_t err = cudaFuncGetAttributes (&kernelAttr, func);
if (err == cudaSuccess)
printf ("GPU: Using device code targeted for architecture compile_%d\n"
"GPU: Ptx version is %d\nGPU: maxThreadsPerBlock = %d\n"
"GPU: numRegsPerThread = %d sharedMemPerBlock = %zu bytes\n",
kernelAttr.binaryVersion, kernelAttr.ptxVersion,
kernelAttr.maxThreadsPerBlock, kernelAttr.numRegs,
kernelAttr.sharedSizeBytes);
}
}