-
Notifications
You must be signed in to change notification settings - Fork 0
/
cuda_utils.h
185 lines (157 loc) · 5.01 KB
/
cuda_utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#pragma once
#include <vector>
#include <limits>
#include <cuda_runtime.h>
#include <helper_cuda.h>
template <typename T>
class device_vector;
template <typename T>
class device_ptr {
T *ptr_ = nullptr;
private:
device_ptr(T *ptr_d) : ptr_(ptr_d) {}
template <typename P>
friend device_ptr<P> make_device_ptr_from_raw_ptr(P *ptr_d);
friend class device_vector<T>;
public:
device_ptr() = default;
const T *dev() const {
return ptr_;
}
T* dev() {
return ptr_;
}
void free() {
checkCudaErrors(cudaFree(ptr_));
}
};
template <typename T>
class device_vector : public device_ptr<T> {
int size_ = 0;
private:
device_vector(T* ptr_d, int size) : device_ptr<T>(ptr_d), size_(size) {}
template <typename P>
friend device_vector<P> make_device_vector_from_raw_ptr(P *ptr_d, int size);
public:
int size() const { return size_;}
};
template <typename T>
class pinned_allocator {
public:
// type definitions
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
// rebind allocator to type U
template <class U>
struct rebind {
typedef pinned_allocator<U> other;
};
// return address of values
pointer address (reference value) const {
return &value;
}
const_pointer address (const_reference value) const {
return &value;
}
/* constructors and destructor
* - nothing to do because the allocator has no state
*/
pinned_allocator() throw() {
}
pinned_allocator(const pinned_allocator&) throw() {
}
template <class U>
pinned_allocator (const pinned_allocator<U>&) throw() {
}
~pinned_allocator() throw() {
}
// return maximum number of elements that can be allocated
size_type max_size () const throw() {
return std::numeric_limits<std::size_t>::max() / sizeof(T);
}
// allocate but don't initialize num elements of type T
pointer allocate (size_type num, const void* = 0) {
pointer ret;
checkCudaErrors(cudaMallocHost((void**)&ret, num * sizeof(T)));
return ret;
}
// initialize elements of allocated storage p with value value
void construct (pointer p, const T& value) {
// initialize memory with placement new
new((void*)p)T(value);
}
// destroy elements of initialized storage p
void destroy (pointer p) {
// destroy objects by calling their destructor
p->~T();
}
// deallocate storage p of deleted elements
void deallocate (pointer p, size_type num) {
checkCudaErrors(cudaFreeHost((void*)p));
}
};
// return that all specializations of this allocator are interchangeable
template <class T1, class T2>
bool operator== (const pinned_allocator<T1>&,
const pinned_allocator<T2>&) throw() {
return true;
}
template <class T1, class T2>
bool operator!= (const pinned_allocator<T1>&,
const pinned_allocator<T2>&) throw() {
return false;
}
/// Makes device pointer from other device pointer.
/// Long name is intentional, becuase this function should not be normally used,
/// because it can easily leed to device/host pointer bugs.
template <typename T>
device_ptr<T> make_device_ptr_from_raw_ptr(T* ptr_d) {
return device_ptr<T>(ptr_d);
}
/// Makes device vector from raw device pointer and size.
/// Long name is intentional, becuase this function should not be normally used,
/// because it can easily leed to device/host pointer bugs.
template <typename T>
device_vector<T> make_device_vector_from_raw_ptr(T* ptr_d, int size) {
return device_vector<T>(ptr_d, size);
}
template <typename T>
device_vector<T> make_device_vector(int size) {
T* array_d;
checkCudaErrors(cudaMalloc((void**)&array_d, sizeof(T) * size));
return make_device_vector_from_raw_ptr(array_d, size);
}
template <typename T, typename AllocType>
device_vector<T> make_device_vector(const std::vector<T, AllocType>& vec) {
device_vector<T> ptr = make_device_vector<T>(vec.size());
checkCudaErrors(cudaMemcpy(ptr.dev(), vec.data(), sizeof(T) * vec.size(),
cudaMemcpyHostToDevice));
return ptr;
}
template <typename T, typename AllocType>
void copy_to_vector(device_ptr<T> ptr, std::vector<T, AllocType> &vec, int size) {
if (vec.size() < size)
vec.resize(size);
checkCudaErrors(cudaMemcpy(vec.data(), ptr.dev(),
sizeof(T) * size,
cudaMemcpyDeviceToHost));
}
template <typename T, typename AllocType>
void copy_to_vector(device_vector<T> dev_vec, std::vector<T, AllocType> &vec) {
copy_to_vector(dev_vec, vec, dev_vec.size());
}
template <typename T, typename AllocType=std::allocator<T>>
std::vector<T, AllocType> make_vector(device_ptr<T> ptr, int size) {
std::vector<T, AllocType> result(size);
copy_to_vector(ptr, result, size);
return result;
}
template <typename T, typename AllocType=std::allocator<T>>
std::vector<T, AllocType> make_vector(device_vector<T> dev_vec) {
return make_vector<T, AllocType>(dev_vec, dev_vec.size());
}