Skip to content

Commit

Permalink
feat/cuda: add memory allocation
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelHirn authored and hobofan committed Dec 3, 2015
1 parent b0a40d3 commit 35f7f47
Show file tree
Hide file tree
Showing 24 changed files with 367 additions and 245 deletions.
30 changes: 15 additions & 15 deletions benches/rblas_overhead.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ fn bench_1000_dot_100_collenchyma(b: &mut Bencher) {
let slice_b = rng.gen_iter::<f32>().take(100).collect::<Vec<f32>>();

let backend = backend();
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 100);
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 100);
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 100);
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 100).unwrap();
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 100).unwrap();
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 100).unwrap();
shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a);
shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b);
let _ = backend.dot(shared_a, shared_b, shared_res);
Expand Down Expand Up @@ -83,9 +83,9 @@ fn bench_100_dot_1000_collenchyma(b: &mut Bencher) {
let slice_b = rng.gen_iter::<f32>().take(1000).collect::<Vec<f32>>();

let backend = backend();
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 1000);
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 1000);
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 1);
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 1000).unwrap();
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 1000).unwrap();
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 1).unwrap();
shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a);
shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b);
let _ = backend.dot(shared_a, shared_b, shared_res);
Expand All @@ -108,9 +108,9 @@ fn bench_50_dot_2000_collenchyma(b: &mut Bencher) {
let slice_b = rng.gen_iter::<f32>().take(2000).collect::<Vec<f32>>();

let backend = backend();
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 2000);
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 2000);
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 1);
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 2000).unwrap();
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 2000).unwrap();
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 1).unwrap();
shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a);
shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b);
let _ = backend.dot(shared_a, shared_b, shared_res);
Expand Down Expand Up @@ -147,9 +147,9 @@ fn bench_10_dot_10000_collenchyma(b: &mut Bencher) {
let slice_b = rng.gen_iter::<f32>().take(10000).collect::<Vec<f32>>();

let backend = backend();
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 10000);
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 10000);
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 1);
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 10000).unwrap();
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 10000).unwrap();
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 1).unwrap();
shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a);
shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b);
let _ = backend.dot(shared_a, shared_b, shared_res);
Expand Down Expand Up @@ -186,9 +186,9 @@ fn bench_5_dot_20000_collenchyma(b: &mut Bencher) {
let slice_b = rng.gen_iter::<f32>().take(20000).collect::<Vec<f32>>();

let backend = backend();
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 20000);
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 20000);
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 1);
let shared_a = &mut SharedMemory::<f32>::new(backend.device(), 20000).unwrap();
let shared_b = &mut SharedMemory::<f32>::new(backend.device(), 20000).unwrap();
let shared_res = &mut SharedMemory::<f32>::new(backend.device(), 1).unwrap();
shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a);
shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b);
let _ = backend.dot(shared_a, shared_b, shared_res);
Expand Down
1 change: 1 addition & 0 deletions src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
//! use co::framework::*;
//! use co::backend::{Backend, BackendConfig};
//! use co::frameworks::OpenCL;
//! #[allow(unused_variables)]
//! fn main() {
//! // Initialize a new Framewok.
//! let framework = OpenCL::new();
Expand Down
73 changes: 70 additions & 3 deletions src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
use hardware::IHardware;
use memory::{IMemory, MemoryType};
use frameworks::native::device::Cpu;
use frameworks::opencl::context::Context as OpenCLContext;
use frameworks::cuda::context::Context as CudaContext;
use frameworks::opencl::Context as OpenCLContext;
use frameworks::cuda::Context as CudaContext;
use frameworks::native::Error as NativeError;
use frameworks::opencl::Error as OpenCLError;
use frameworks::cuda::Error as CudaError;
use std::{fmt, error};

/// Specifies Hardware behavior accross frameworks.
pub trait IDevice {
Expand All @@ -23,7 +27,7 @@ pub trait IDevice {
/// Returns the hardwares, which define the Device.
fn hardwares(&self) -> Vec<Self::H>;
/// Allocate memory on the Device.
fn alloc_memory(&self, size: usize) -> Self::M;
fn alloc_memory(&self, size: usize) -> Result<Self::M, Error>;
/// Synchronize memory from this Device to `dest_device`.
fn sync_memory_to(&self, source: &Self::M, dest: &mut MemoryType, dest_device: &DeviceType);
}
Expand All @@ -38,3 +42,66 @@ pub enum DeviceType {
/// A Cuda Context
Cuda(CudaContext),
}

#[derive(Debug, Copy, Clone)]
/// Defines a generic set of Memory Errors.
pub enum Error {
/// Failures related to the Native framework implementation.
Native(NativeError),
/// Failures related to the OpenCL framework implementation.
OpenCL(OpenCLError),
/// Failures related to the Cuda framework implementation.
Cuda(CudaError),
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Native(ref err) => write!(f, "Native error: {}", err),
Error::OpenCL(ref err) => write!(f, "OpenCL error: {}", err),
Error::Cuda(ref err) => write!(f, "Cuda error: {}", err),
}
}
}

impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::Native(ref err) => err.description(),
Error::OpenCL(ref err) => err.description(),
Error::Cuda(ref err) => err.description(),
}
}

fn cause(&self) -> Option<&error::Error> {
match *self {
Error::Native(ref err) => Some(err),
Error::OpenCL(ref err) => Some(err),
Error::Cuda(ref err) => Some(err),
}
}
}

impl From<NativeError> for Error {
fn from(err: NativeError) -> Error {
Error::Native(err)
}
}

impl From<OpenCLError> for Error {
fn from(err: OpenCLError) -> Error {
Error::OpenCL(err)
}
}

impl From<CudaError> for Error {
fn from(err: CudaError) -> Error {
Error::Cuda(err)
}
}

impl From<Error> for ::shared_memory::Error {
fn from(err: Error) -> ::shared_memory::Error {
::shared_memory::Error::MemoryAllocationError(err)
}
}
7 changes: 6 additions & 1 deletion src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ pub enum Error {
Framework(::framework::Error),
/// Failure related to the SharedMemory.
SharedMemory(::shared_memory::Error),
/// Failure realted to an Library(Operation).
/// Failure related to an Library(Operation).
Operation(::libraries::Error),
/// Failure related to a Device.
Device(::device::Error),
}

impl fmt::Display for Error {
Expand All @@ -19,6 +21,7 @@ impl fmt::Display for Error {
Error::Framework(ref err) => write!(f, "Framwork error: {}", err),
Error::SharedMemory(ref err) => write!(f, "SharedMemory error: {}", err),
Error::Operation(ref err) => write!(f, "Library/Operation error: {}", err),
Error::Device(ref err) => write!(f, "Device error: {}", err),
}
}
}
Expand All @@ -29,6 +32,7 @@ impl error::Error for Error {
Error::Framework(ref err) => err.description(),
Error::SharedMemory(ref err) => err.description(),
Error::Operation(ref err) => err.description(),
Error::Device(ref err) => err.description(),
}
}

Expand All @@ -37,6 +41,7 @@ impl error::Error for Error {
Error::Framework(ref err) => Some(err),
Error::SharedMemory(ref err) => Some(err),
Error::Operation(ref err) => Some(err),
Error::Device(ref err) => Some(err),
}
}
}
26 changes: 13 additions & 13 deletions src/frameworks/cuda/api/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ impl API {
let mut context: CUcontext = ptr::null_mut();
match cuCtxCreate_v2(&mut context, CU_CTX_SCHED_AUTO, dev) {
CUresult::CUDA_SUCCESS => Ok(context),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized(format!("CUDA got deinitialized."))),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized(format!("CUDA is not initialized."))),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext(format!("No valid context available."))),
CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue(format!("Invalid value for `device` provided."))),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue(format!("Invalid value provided."))),
CUresult::CUDA_ERROR_OUT_OF_MEMORY => Err(Error::OutOfMemory(format!("Device is out of memory."))),
CUresult::CUDA_ERROR_UNKNOWN => Err(Error::Unknown(format!("An unknown Error occured. Check the CUDA DRIVER API manual for more details."))),
_ => Err(Error::Unknown(format!("Unable to create Cuda context."))),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")),
CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")),
CUresult::CUDA_ERROR_OUT_OF_MEMORY => Err(Error::OutOfMemory("Device is out of memory.")),
CUresult::CUDA_ERROR_UNKNOWN => Err(Error::Unknown("An unknown Error occured. Check the CUDA DRIVER API manual for more details.")),
_ => Err(Error::Unknown("Unable to create Cuda context.")),
}
}

Expand All @@ -49,11 +49,11 @@ impl API {
) -> Result<(), Error> {
match cuCtxDestroy_v2(ctx) {
CUresult::CUDA_SUCCESS => Ok(()),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized(format!("CUDA got deinitialized."))),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized(format!("CUDA is not initialized."))),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext(format!("No valid context available."))),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue(format!("Invalid value provided."))),
_ => Err(Error::Unknown(format!("Unable to destroy Cuda context."))),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")),
_ => Err(Error::Unknown("Unable to destroy Cuda context.")),
}
}
}
56 changes: 28 additions & 28 deletions src/frameworks/cuda/api/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ impl API {
) -> Result<(), Error> {
match cuDeviceGet(device, ordinal) {
CUresult::CUDA_SUCCESS => Ok(()),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized(format!("CUDA got deinitialized."))),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized(format!("CUDA is not initialized."))),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext(format!("No valid context available."))),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue(format!("Invalid value provided."))),
_ => Err(Error::Unknown(format!("Unable to get Device count."))),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")),
_ => Err(Error::Unknown("Unable to get Device count.")),
}
}

Expand All @@ -86,11 +86,11 @@ impl API {
) -> Result<(), Error> {
match cuDeviceGetCount(count) {
CUresult::CUDA_SUCCESS => Ok(()),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized(format!("CUDA got deinitialized."))),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized(format!("CUDA is not initialized."))),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext(format!("No valid context available."))),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue(format!("Invalid value provided."))),
_ => Err(Error::Unknown(format!("Unable to get Device count."))),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")),
_ => Err(Error::Unknown("Unable to get Device count.")),
}
}

Expand All @@ -101,12 +101,12 @@ impl API {
) -> Result<(), Error> {
match cuDeviceGetAttribute(pi, attrib, device) {
CUresult::CUDA_SUCCESS => Ok(()),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized(format!("CUDA got deinitialized."))),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized(format!("CUDA is not initialized."))),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext(format!("No valid context available."))),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue(format!("Invalid value provided."))),
CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue(format!("Invalid value for `device` provided."))),
_ => Err(Error::Unknown(format!("Unable to get device attribute.")))
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")),
CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")),
_ => Err(Error::Unknown("Unable to get device attribute."))
}
}

Expand All @@ -117,12 +117,12 @@ impl API {
) -> Result<(), Error> {
match cuDeviceGetName(name, len, device) {
CUresult::CUDA_SUCCESS => Ok(()),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized(format!("CUDA got deinitialized."))),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized(format!("CUDA is not initialized."))),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext(format!("No valid context available."))),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue(format!("Invalid value provided."))),
CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue(format!("Invalid value for `device` provided."))),
_ => Err(Error::Unknown(format!("Unable to get device name.")))
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")),
CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")),
_ => Err(Error::Unknown("Unable to get device name."))
}
}

Expand All @@ -132,12 +132,12 @@ impl API {
) -> Result<(), Error> {
match cuDeviceTotalMem_v2(bytes, device) {
CUresult::CUDA_SUCCESS => Ok(()),
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized(format!("CUDA got deinitialized."))),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized(format!("CUDA is not initialized."))),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext(format!("No valid context available."))),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue(format!("Invalid value provided."))),
CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue(format!("Invalid value for `device` provided."))),
_ => Err(Error::Unknown(format!("Unable to get total mem of device.")))
CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")),
CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")),
CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")),
CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")),
CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")),
_ => Err(Error::Unknown("Unable to get total mem of device."))
}
}
}
Loading

0 comments on commit 35f7f47

Please sign in to comment.