-
-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Regards #291: Added support for untyped, owned, memory regions:
* Added a `unique_region` class - non-templated * `unique_region`'s "decay" into `cuda::memory::region_t`'s - so there's no need to reimplement half the world for them * Added a missing variant of `cuda::memory::copy()` * Added an example program based on vectorAdd, which uses unique_regions (I would have replaced vectorAdd with it - had it not requirted to lines of source more than before...) * `region_t`'s now have an `as_span` method, as well as an `operator span()` * Removed some commented-out unique-pointer-related code
- Loading branch information
Showing
7 changed files
with
477 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 85 additions & 0 deletions
85
examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/** | ||
* Derived from the nVIDIA CUDA 8.0 samples by | ||
* | ||
* Eyal Rozenberg | ||
* | ||
* The derivation is specifically permitted in the nVIDIA CUDA Samples EULA | ||
* and the deriver is the owner of this code according to the EULA. | ||
* | ||
* Use this reasonably. If you want to discuss licensing formalities, please | ||
* contact the author. | ||
*/ | ||
|
||
#include "../../common.hpp" | ||
|
||
#include <cuda/api.hpp> | ||
|
||
#include <iostream> | ||
#include <memory> | ||
#include <algorithm> | ||
|
||
__global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) | ||
{ | ||
int i = blockDim.x * blockIdx.x + threadIdx.x; | ||
if (i < numElements) { C[i] = A[i] + B[i]; } | ||
} | ||
|
||
int main() | ||
{ | ||
if (cuda::device::count() == 0) { | ||
std::cerr << "No CUDA devices on this system" << "\n"; | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
int numElements = 50000; | ||
std::cout << "[Vector addition of " << numElements << " elements]\n"; | ||
|
||
// If we could rely on C++14, we would use std::make_unique | ||
auto h_A = std::unique_ptr<float[]>(new float[numElements]); | ||
auto h_B = std::unique_ptr<float[]>(new float[numElements]); | ||
auto h_C = std::unique_ptr<float[]>(new float[numElements]); | ||
|
||
auto generator = []() { return rand() / (float) RAND_MAX; }; | ||
std::generate(h_A.get(), h_A.get() + numElements, generator); | ||
std::generate(h_B.get(), h_B.get() + numElements, generator); | ||
|
||
auto device = cuda::device::current::get(); | ||
|
||
auto d_A = cuda::memory::make_unique_region(device, numElements * sizeof(float)); | ||
auto d_B = cuda::memory::make_unique_region(device, numElements * sizeof(float)); | ||
auto d_C = cuda::memory::make_unique_region(device, numElements * sizeof(float)); | ||
auto sp_A = d_A.as_span<float>(); | ||
auto sp_B = d_B.as_span<float>(); | ||
auto sp_C = d_C.as_span<float>(); | ||
|
||
cuda::memory::copy(sp_A, h_A.get()); | ||
cuda::memory::copy(sp_B, h_B.get()); | ||
|
||
auto launch_config = cuda::launch_config_builder() | ||
.overall_size(numElements) | ||
.block_size(256) | ||
.build(); | ||
|
||
std::cout | ||
<< "CUDA kernel launch with " << launch_config.dimensions.grid.x | ||
<< " blocks of " << launch_config.dimensions.block.x << " threads each\n"; | ||
|
||
cuda::launch( | ||
vectorAdd, launch_config, | ||
sp_A.data(), sp_B.data(), sp_C.data(), numElements | ||
); | ||
|
||
cuda::memory::copy(h_C.get(), sp_C); | ||
|
||
// Verify that the result vector is correct | ||
for (int i = 0; i < numElements; ++i) { | ||
if (fabs(h_A.get()[i] + h_B.get()[i] - h_C.get()[i]) > 1e-5) { | ||
std::cerr << "Result verification failed at element " << i << "\n"; | ||
exit(EXIT_FAILURE); | ||
} | ||
} | ||
|
||
std::cout << "Test PASSED\n"; | ||
std::cout << "SUCCESS\n"; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
/** | ||
* @file | ||
* | ||
* @brief Implementations of @ref `make_unique_region()` functions | ||
*/ | ||
#pragma once | ||
#ifndef MULTI_WRAPPER_IMPLS_UNIQUE_REGION_HPP_ | ||
#define MULTI_WRAPPER_IMPLS_UNIQUE_REGION_HPP_ | ||
|
||
#include "../unique_region.hpp" | ||
#include "../types.hpp" | ||
|
||
namespace cuda { | ||
|
||
namespace memory { | ||
|
||
namespace device { | ||
|
||
inline unique_region make_unique_region(const context_t& context, cuda::size_t num_elements) | ||
{ | ||
return detail_::make_unique_region(context.handle(), num_elements); | ||
} | ||
|
||
/** | ||
* @brief Create a variant of ::std::unique_pointer for an array in | ||
* device-global memory | ||
* | ||
* @tparam T an array type; _not_ the type of individual elements | ||
* | ||
* @param device on which to construct the array of elements | ||
* @param num_elements the number of elements to allocate | ||
* @return an ::std::unique_ptr pointing to the constructed T array | ||
*/ | ||
inline unique_region make_unique_region(const device_t& device, size_t num_elements) | ||
{ | ||
auto pc = device.primary_context(); | ||
return make_unique_region(pc, num_elements); | ||
} | ||
|
||
/** | ||
* @brief Create a variant of ::std::unique_pointer for an array in | ||
* device-global memory on the current device. | ||
* | ||
* @note The allocation will be made in the device's primary context - | ||
* which will be created if it has not yet been. | ||
* | ||
* @tparam T an array type; _not_ the type of individual elements | ||
* | ||
* @param num_elements the number of elements to allocate | ||
* | ||
* @return an ::std::unique_ptr pointing to the constructed T array | ||
*/ | ||
inline unique_region make_unique_region(size_t num_elements) | ||
{ | ||
auto current_device_id = cuda::device::current::detail_::get_id(); | ||
auto pc = cuda::device::primary_context::detail_::leaky_get(current_device_id); | ||
return make_unique_region(pc, num_elements); | ||
} | ||
|
||
} // namespace device | ||
|
||
namespace host { | ||
|
||
inline unique_region make_unique_region( | ||
const context_t& context, | ||
size_t num_bytes, | ||
allocation_options options) | ||
{ | ||
CAW_SET_SCOPE_CONTEXT(context.handle()); | ||
return unique_region{ allocate(num_bytes, options) }; | ||
} | ||
|
||
inline unique_region make_unique_region(const device_t& device, size_t num_bytes) | ||
{ | ||
auto pc = device.primary_context(); | ||
return make_unique_region(pc, num_bytes); | ||
} | ||
|
||
inline unique_region make_unique_region(size_t num_bytes) | ||
{ | ||
return unique_region { allocate(num_bytes) }; | ||
} | ||
|
||
} // namespace host | ||
|
||
namespace managed { | ||
|
||
inline unique_region make_unique_region( | ||
const context_t& context, | ||
size_t num_bytes, | ||
initial_visibility_t initial_visibility) | ||
{ | ||
CAW_SET_SCOPE_CONTEXT(context.handle()); | ||
return unique_region { detail_::allocate_in_current_context(num_bytes, initial_visibility) }; | ||
} | ||
|
||
inline unique_region make_unique_region( | ||
const device_t& device, | ||
size_t num_bytes, | ||
initial_visibility_t initial_visibility) | ||
{ | ||
auto pc = device.primary_context(); | ||
return make_unique_region(pc, num_bytes, initial_visibility); | ||
} | ||
|
||
inline unique_region make_unique_region( | ||
size_t num_bytes, | ||
initial_visibility_t initial_visibility) | ||
{ | ||
auto current_device_id = cuda::device::current::detail_::get_id(); | ||
auto pc = cuda::device::primary_context::detail_::leaky_get(current_device_id); | ||
return make_unique_region(pc, num_bytes, initial_visibility); | ||
} | ||
|
||
} // namespace managed | ||
|
||
} // namespace memory | ||
|
||
} // namespace cuda | ||
|
||
#endif // MULTI_WRAPPER_IMPLS_UNIQUE_REGION_HPP_ | ||
|
Oops, something went wrong.