Skip to content

Commit

Permalink
feat/convolution: change convolution functions to require workspace
Browse files Browse the repository at this point in the history
BREAKING CHANGE: All convolution functions now require
a SharedTensor<u8> workspace to be passed.
This allows for reuse of the workspace between different
convolution operations and a global shared workspace.

REFERENCE #27
  • Loading branch information
hobofan committed Mar 2, 2016
1 parent 6d0851f commit f9d4013
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 65 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ license = "MIT OR Apache-2.0"

[dependencies]
collenchyma = { version = "0.0.8", default-features = false }
cudnn = { version = "1.2.1", optional = true }
cudnn = { version = "1.3.0", optional = true }
libc = "0.2"
lazy_static = "0.1"
log = "0.3.2"

clippy = { version = "0.0.27", optional = true }

Expand Down
18 changes: 15 additions & 3 deletions src/frameworks/cuda/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,25 +510,29 @@ macro_rules! impl_ops_convolution_for {
filter: &mut ::co::tensor::SharedTensor<$t>,
x: &mut ::co::tensor::SharedTensor<$t>,
result: &mut ::co::tensor::SharedTensor<$t>,
workspace: &mut ::co::tensor::SharedTensor<u8>,
config: &Self::CC //::frameworks::cuda::CC
) -> Result<(), ::co::error::Error> {
match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
match result.add_device(self.device()) { _ => () }
match workspace.add_device(self.device()) { _ => () }

self.convolution_plain(filter, x, result, config)
self.convolution_plain(filter, x, result, workspace, config)
}

fn convolution_plain(
&self,
filter: &::co::tensor::SharedTensor<$t>,
x: &::co::tensor::SharedTensor<$t>,
result: &mut ::co::tensor::SharedTensor<$t>,
workspace: &mut ::co::tensor::SharedTensor<u8>,
config: &Self::CC
) -> Result<(), ::co::error::Error> {
let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();

Ok(try!(match CUDNN.convolution_forward(
config,
try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(workspace, self.device()) }),
try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(filter, self.device()) }),
&try!(x.cudnn_tensor_desc()), // src_desc
try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data
Expand All @@ -549,13 +553,15 @@ macro_rules! impl_ops_convolution_for {
src_data: &mut ::co::tensor::SharedTensor<$t>,
dest_diff: &mut ::co::tensor::SharedTensor<$t>,
filter_diff: &mut ::co::tensor::SharedTensor<$t>,
workspace: &mut ::co::tensor::SharedTensor<u8>,
config: &Self::CC
) -> Result<(), ::co::error::Error> {
match src_data.add_device(self.device()) { _ => try!(src_data.sync(self.device())) }
match dest_diff.add_device(self.device()) { _ => try!(dest_diff.sync(self.device())) }
match filter_diff.add_device(self.device()) { _ => try!(filter_diff.sync(self.device())) }
match workspace.add_device(self.device()) { _ => () }

self.convolution_grad_filter_plain(src_data, dest_diff, filter_diff, config)
self.convolution_grad_filter_plain(src_data, dest_diff, filter_diff, workspace, config)
}

#[allow(unused_variables)]
Expand All @@ -564,12 +570,14 @@ macro_rules! impl_ops_convolution_for {
src_data: &::co::tensor::SharedTensor<$t>,
dest_diff: &::co::tensor::SharedTensor<$t>,
filter_diff: &mut ::co::tensor::SharedTensor<$t>,
workspace: &mut ::co::tensor::SharedTensor<u8>,
config: &Self::CC
) -> Result<(), ::co::error::Error> {
let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();

Ok(try!(match CUDNN.convolution_backward_filter(
config,
try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(workspace, self.device()) }),
&try!(src_data.cudnn_tensor_desc()),
try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(src_data, self.device()) }),
&try!(dest_diff.cudnn_tensor_desc()),
Expand All @@ -590,13 +598,15 @@ macro_rules! impl_ops_convolution_for {
filter: &mut ::co::tensor::SharedTensor<$t>,
x_diff: &mut ::co::tensor::SharedTensor<$t>,
result_diff: &mut ::co::tensor::SharedTensor<$t>,
workspace: &mut ::co::tensor::SharedTensor<u8>,
config: &Self::CC
) -> Result<(), ::co::error::Error> {
match filter.add_device(self.device()) { _ => try!(filter.sync(self.device())) }
match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) }
match result_diff.add_device(self.device()) { _ => try!(result_diff.sync(self.device())) }
match workspace.add_device(self.device()) { _ => () }

self.convolution_grad_data_plain(filter, x_diff, result_diff, config)
self.convolution_grad_data_plain(filter, x_diff, result_diff, workspace, config)
}

#[allow(unused_variables)]
Expand All @@ -605,12 +615,14 @@ macro_rules! impl_ops_convolution_for {
filter: &::co::tensor::SharedTensor<$t>,
x_diff: &::co::tensor::SharedTensor<$t>,
result_diff: &mut ::co::tensor::SharedTensor<$t>,
workspace: &mut ::co::tensor::SharedTensor<u8>,
config: &Self::CC
) -> Result<(), ::co::error::Error> {
let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();

Ok(try!(match CUDNN.convolution_backward_data(
config,
try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(workspace, self.device()) }),
try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(filter, self.device()) }),
&try!(x_diff.cudnn_tensor_desc()),
try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }),
Expand Down
60 changes: 21 additions & 39 deletions src/frameworks/cuda/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,12 @@ impl ConvBackwardDataAlgo {

macro_rules! impl_convolution_for_cuda_backend {
($t:ty, $cutype:path) => (
impl ConvolutionConfig<$t> for ::cudnn::utils::ConvolutionConfig {
fn workspace_size(&self) -> usize {
*self.largest_workspace_size()
}
}

impl Convolution<$t> for Backend<Cuda> {
fn new_convolution_config(
&self,
Expand All @@ -292,53 +298,29 @@ macro_rules! impl_convolution_for_cuda_backend {
let conv_desc = ::cudnn::ConvolutionDescriptor::new(zero_padding, stride, $cutype).unwrap();

let useable_algo_fwd = try!(algo_fwd.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
let (workspace_fwd, workspace_size_fwd) = match try!(useable_algo_fwd.needs_cudnn_workspace()) {
false => (::co::frameworks::cuda::Memory::from_c(0), 0),
true => {
let workspace_size_fwd = API::get_convolution_forward_workspace_size(*CUDNN.id_c(), useable_algo_fwd.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
let workspace_forward = ::co::frameworks::cuda::Memory::new(workspace_size_fwd).unwrap();
(workspace_forward, workspace_size_fwd)
}
};

let useable_algo_bwd_filter = try!(algo_bwd_filter.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
let (workspace_bwd_filter, workspace_size_bwd_filter) = match try!(useable_algo_bwd_filter.needs_cudnn_workspace()) {
false => (::co::frameworks::cuda::Memory::from_c(0), 0),
true => {
let workspace_size_bwd_filter = API::get_convolution_backward_filter_workspace_size(*CUDNN.id_c(), useable_algo_bwd_filter.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
let workspace_backward = ::co::frameworks::cuda::Memory::new(workspace_size_bwd_filter).unwrap();
(workspace_backward, workspace_size_bwd_filter)
}
};

let useable_algo_bwd_data = try!(algo_bwd_data.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
let (workspace_bwd_data, workspace_size_bwd_data) = match try!(useable_algo_bwd_data.needs_cudnn_workspace()) {
false => (::co::frameworks::cuda::Memory::from_c(0), 0),
true => {
let workspace_size_bwd_data = API::get_convolution_backward_data_workspace_size(*CUDNN.id_c(), useable_algo_bwd_data.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
let workspace_backward = ::co::frameworks::cuda::Memory::new(workspace_size_bwd_data).unwrap();
(workspace_backward, workspace_size_bwd_data)
}

let workspace_size_fwd = match try!(useable_algo_fwd.needs_cudnn_workspace()) {
false => 0,
true => API::get_convolution_forward_workspace_size(*CUDNN.id_c(), useable_algo_fwd.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap(),
};

// share one workspace to reduce memory
let workspace: ::co::frameworks::cuda::Memory;
if workspace_size_bwd_data >= workspace_size_bwd_filter && workspace_size_bwd_data >= workspace_size_fwd {
workspace = workspace_bwd_data;
} else if workspace_size_bwd_filter >= workspace_size_bwd_data && workspace_size_bwd_filter >= workspace_size_fwd {
workspace = workspace_bwd_filter;
} else {
workspace = workspace_fwd;
}
let workspace_size_bwd_filter = match try!(useable_algo_bwd_filter.needs_cudnn_workspace()) {
false => 0,
true => API::get_convolution_backward_filter_workspace_size(*CUDNN.id_c(), useable_algo_bwd_filter.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap(),
};

let workspace_bwd_filter = ::co::frameworks::cuda::Memory::from_c(*workspace.id_c());
let workspace_fwd = ::co::frameworks::cuda::Memory::from_c(*workspace.id_c());
let workspace_size_bwd_data = match try!(useable_algo_bwd_data.needs_cudnn_workspace()) {
false => 0,
true => API::get_convolution_backward_data_workspace_size(*CUDNN.id_c(), useable_algo_bwd_data.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap(),
};

Ok(
::cudnn::utils::ConvolutionConfig::new(
useable_algo_fwd.as_cudnn().unwrap(), workspace_fwd, workspace_size_fwd,
useable_algo_bwd_filter.as_cudnn().unwrap(), workspace_bwd_filter, workspace_size_bwd_filter,
useable_algo_bwd_data.as_cudnn().unwrap(), workspace, workspace_size_bwd_data,
useable_algo_fwd.as_cudnn().unwrap(), workspace_size_fwd,
useable_algo_bwd_filter.as_cudnn().unwrap(), workspace_size_bwd_filter,
useable_algo_bwd_data.as_cudnn().unwrap(), workspace_size_bwd_data,
conv_desc, filter_desc
)
)
Expand Down
1 change: 1 addition & 0 deletions src/frameworks/native/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ pub fn tanh_grad<T: Float>(x: &T, dx: &T) -> T {
macro_rules! impl_oconf_for_cc(($($t: ident), +) => (
$(
impl<'a> NNOperationConfig<$t> for ::frameworks::native::helper::ConvolutionConfig { }
impl<'a> ConvolutionConfig<$t> for ::frameworks::native::helper::ConvolutionConfig { }
)+
));

Expand Down
9 changes: 4 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
//! extern crate collenchyma_nn as nn;
//! # #[cfg(feature = "cuda")]
//! # mod cuda {
//! use co::backend::{Backend, BackendConfig};
//! use co::backend::{Backend, BackendConfig, IBackend};
//! use co::framework::IFramework;
//! use co::frameworks::{Cuda, Native};
//! use co::memory::MemoryType;
Expand All @@ -56,10 +56,7 @@
//! pub fn main() {
//! // Initialize a CUDA Backend.
//! // Usually you would not use CUDA but let Collenchyma pick what is available on the machine.
//! let framework = Cuda::new();
//! let hardwares = framework.hardwares();
//! let backend_config = BackendConfig::new(framework, hardwares);
//! let backend = Backend::new(backend_config).unwrap();
//! let backend = Backend::<Cuda>::default().unwrap();
//! // Initialize two SharedTensors.
//! let mut x = SharedTensor::<f32>::new(backend.device(), &(1, 1, 3)).unwrap();
//! let mut result = SharedTensor::<f32>::new(backend.device(), &(1, 1, 3)).unwrap();
Expand Down Expand Up @@ -132,6 +129,8 @@ extern crate cudnn;
extern crate libc;
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate log;

pub use plugin::*;

Expand Down
25 changes: 18 additions & 7 deletions src/plugin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,21 @@ impl ConvBackwardDataAlgo {
/// Needs to be implemented for Operation specific configurations.
pub trait NNOperationConfig<F> {}

/// Provides Convlution Config functionality.
///
/// Needs to be implemented for Operation specific configurations.
pub trait ConvolutionConfig<F> {
/// Returns the largest workspace size in bytes needed
/// for any of the convolution operations.
fn workspace_size(&self) -> usize {
0
}
}

/// Provides the functionality for a backend to support Neural Network related operations.
pub trait NN<F> {
/// The Convolution Operation Config representation for this Plugin.
type CC: NNOperationConfig<F>;
type CC: NNOperationConfig<F> + ConvolutionConfig<F>;
/// The LRN Operation Config representation for this Plugin.
type CLRN: NNOperationConfig<F>;
/// The Pooling Operation Config representation for this Plugin.
Expand Down Expand Up @@ -368,7 +379,7 @@ pub trait Convolution<F> : NN<F> {
/// Saves the result to `result`.
///
/// For a no-memory managed version see `convolution_plain`.
fn convolution(&self, filter: &mut SharedTensor<F>, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>, config: &Self::CC) -> Result<(), ::co::error::Error>;
fn convolution(&self, filter: &mut SharedTensor<F>, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>, workspace: &mut SharedTensor<u8>, config: &Self::CC) -> Result<(), ::co::error::Error>;

/// Computes the convolution over the input Tensor `x` without any memory management.
///
Expand All @@ -377,15 +388,15 @@ pub trait Convolution<F> : NN<F> {
/// *Attention*:<br/>
/// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
/// For a memory managed version see `convolution`.
fn convolution_plain(&self, filter: &SharedTensor<F>, x: &SharedTensor<F>, result: &mut SharedTensor<F>, config: &Self::CC) -> Result<(), ::co::error::Error>;
fn convolution_plain(&self, filter: &SharedTensor<F>, x: &SharedTensor<F>, result: &mut SharedTensor<F>, workspace: &mut SharedTensor<u8>, config: &Self::CC) -> Result<(), ::co::error::Error>;

/// Computes the gradient of a [CNN convolution][convolution] with respect to the filter and complete memory management.
/// [convolution]: https://en.wikipedia.org/wiki/Convolutional_neural_network
///
/// Saves the result to `filter_diff`.
///
/// For a no-memory managed version see `convolution_grad_filter_plain`.
fn convolution_grad_filter(&self, src_data: &mut SharedTensor<F>, dest_diff: &mut SharedTensor<F>, filter_diff: &mut SharedTensor<F>, config: &Self::CC) -> Result<(), ::co::error::Error>;
fn convolution_grad_filter(&self, src_data: &mut SharedTensor<F>, dest_diff: &mut SharedTensor<F>, filter_diff: &mut SharedTensor<F>, workspace: &mut SharedTensor<u8>, config: &Self::CC) -> Result<(), ::co::error::Error>;

/// Computes the gradient of a convolution with respect to the filter and without any memory management.
///
Expand All @@ -394,15 +405,15 @@ pub trait Convolution<F> : NN<F> {
/// *Attention*:<br/>
/// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
/// For a memory managed version see `convolution_grad_filter`.
fn convolution_grad_filter_plain(&self, src_data: &SharedTensor<F>, dest_diff: &SharedTensor<F>, filter_diff: &mut SharedTensor<F>, config: &Self::CC) -> Result<(), ::co::error::Error>;
fn convolution_grad_filter_plain(&self, src_data: &SharedTensor<F>, dest_diff: &SharedTensor<F>, filter_diff: &mut SharedTensor<F>, workspace: &mut SharedTensor<u8>, config: &Self::CC) -> Result<(), ::co::error::Error>;

/// Computes the gradient of a [CNN convolution][convolution] over the input Tensor `x` with respect to the data and complete memory management.
/// [convolution]: https://en.wikipedia.org/wiki/Convolutional_neural_network
///
/// Saves the result to `result_diff`.
///
/// For a no-memory managed version see `convolution_grad_data_plain`.
fn convolution_grad_data(&self, filter: &mut SharedTensor<F>, x_diff: &mut SharedTensor<F>, result_diff: &mut SharedTensor<F>, config: &Self::CC) -> Result<(), ::co::error::Error>;
fn convolution_grad_data(&self, filter: &mut SharedTensor<F>, x_diff: &mut SharedTensor<F>, result_diff: &mut SharedTensor<F>, workspace: &mut SharedTensor<u8>, config: &Self::CC) -> Result<(), ::co::error::Error>;

/// Computes the gradient of a convolution over the input Tensor `x` with respect to the data and without any memory management.
///
Expand All @@ -411,7 +422,7 @@ pub trait Convolution<F> : NN<F> {
/// *Attention*:<br/>
/// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
/// For a memory managed version see `convolution_grad_data`.
fn convolution_grad_data_plain(&self, filter: &SharedTensor<F>, x_diff: &SharedTensor<F>, result_diff: &mut SharedTensor<F>, config: &Self::CC) -> Result<(), ::co::error::Error>;
fn convolution_grad_data_plain(&self, filter: &SharedTensor<F>, x_diff: &SharedTensor<F>, result_diff: &mut SharedTensor<F>, workspace: &mut SharedTensor<u8>, config: &Self::CC) -> Result<(), ::co::error::Error>;
}

// /// Computes the backward Convolution function w.r.t the bias.
Expand Down
Loading

0 comments on commit f9d4013

Please sign in to comment.