diff --git a/Cargo.toml b/Cargo.toml index 4fc26b5..2d5fb2e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ license = "MIT OR Apache-2.0" [dependencies] collenchyma = { version = "0.0.7", default-features = false } -cudnn = { version = "1.1.0", optional = true } +cudnn = { version = "1.2.0", optional = true } libc = "0.2" lazy_static = "0.1" diff --git a/README.md b/README.md index 7e8cd59..277e12e 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ More information can be found in the [Documentation][docs-ops]. | Convolution | cudNN v3 | - | - | | | | | | | Softmax | cudNN v3 | - | Rust | +| LogSoftmax | cudNN v3 | - | Rust | | | | | | | Pooling Max | cudNN v3 | - | - | | Pooling Avg | cudNN v3 | - | - | diff --git a/src/frameworks/cuda/helper.rs b/src/frameworks/cuda/helper.rs index bdbe6e5..25d8d0d 100644 --- a/src/frameworks/cuda/helper.rs +++ b/src/frameworks/cuda/helper.rs @@ -442,7 +442,7 @@ macro_rules! impl_ops_softmax_for { ) { Ok(_) => Ok(()), Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation softmax Forward.")) + Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN softmax Forward.")) } })) } @@ -479,7 +479,83 @@ macro_rules! impl_ops_softmax_for { ) { Ok(_) => Ok(()), Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation softmax Backward.")) + Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN softmax Backward.")) + } + })) + } + } + ) +} + +#[macro_export] +macro_rules! impl_ops_log_softmax_for { + ($t:ident, $b:ty) => ( + impl ::plugin::LogSoftmax<$t> for $b { + fn log_softmax( + &self, + x: &mut ::co::tensor::SharedTensor<$t>, + result: &mut ::co::tensor::SharedTensor<$t> + ) -> Result<(), ::co::error::Error> { + match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match result.add_device(self.device()) { _ => () } + + self.log_softmax_plain(x, result) + } + + fn log_softmax_plain( + &self, + x: &::co::tensor::SharedTensor<$t>, + result: &mut ::co::tensor::SharedTensor<$t> + ) -> Result<(), ::co::error::Error> { + let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); + + Ok(try!(match CUDNN.log_softmax_forward( + &try!(x.cudnn_tensor_desc_softmax()), // src_desc + try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data + &try!(result.cudnn_tensor_desc_softmax()), // dest_desc + try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data + scal_params + ) { + Ok(_) => Ok(()), + Err(_) => { + Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN logarithmic softmax Forward.")) + } + })) + } + + fn log_softmax_grad( + &self, + x: &mut ::co::tensor::SharedTensor<$t>, + x_diff: &mut ::co::tensor::SharedTensor<$t>, + result_diff: &mut ::co::tensor::SharedTensor<$t> + ) -> Result<(), ::co::error::Error> { + match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match result_diff.add_device(self.device()) { _ => () } + + self.log_softmax_grad_plain(x, x_diff, result_diff) + } + + fn log_softmax_grad_plain( + &self, + x: &::co::tensor::SharedTensor<$t>, + x_diff: &::co::tensor::SharedTensor<$t>, + result_diff: &mut ::co::tensor::SharedTensor<$t> + ) -> Result<(), ::co::error::Error> { + let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); + + Ok(try!(match CUDNN.log_softmax_backward( + &try!(x.cudnn_tensor_desc_softmax()), // src_desc + try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data + &try!(x_diff.cudnn_tensor_desc_softmax()), // src_diff_desc + try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data + &try!(result_diff.cudnn_tensor_desc_softmax()), // dest_diff_desc + try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data + scal_params + ) { + Ok(_) => Ok(()), + Err(_) => { + Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN logarithmic softmax Backward.")) } })) } diff --git a/src/frameworks/cuda/mod.rs b/src/frameworks/cuda/mod.rs index ae46f8d..878fb51 100644 --- a/src/frameworks/cuda/mod.rs +++ b/src/frameworks/cuda/mod.rs @@ -350,6 +350,7 @@ impl_ops_sigmoid_for!(f32, Backend); impl_ops_relu_for!(f32, Backend); impl_ops_tanh_for!(f32, Backend); impl_ops_softmax_for!(f32, Backend); +impl_ops_log_softmax_for!(f32, Backend); impl_ops_lrn_for!(f32, Backend); impl_ops_pooling_for!(f32, Backend); @@ -367,5 +368,6 @@ impl_ops_sigmoid_for!(f64, Backend); impl_ops_relu_for!(f64, Backend); impl_ops_tanh_for!(f64, Backend); impl_ops_softmax_for!(f64, Backend); +impl_ops_log_softmax_for!(f64, Backend); impl_ops_lrn_for!(f64, Backend); impl_ops_pooling_for!(f64, Backend); diff --git a/src/frameworks/native/helper.rs b/src/frameworks/native/helper.rs index 300587d..9a41ecb 100644 --- a/src/frameworks/native/helper.rs +++ b/src/frameworks/native/helper.rs @@ -411,6 +411,85 @@ macro_rules! impl_ops_softmax_for { ); } +#[macro_export] +macro_rules! impl_ops_log_softmax_for { + ($t:ident, $b:ty) => ( + impl ::plugin::LogSoftmax<$t> for $b { + fn log_softmax( + &self, + x: &mut ::co::tensor::SharedTensor<$t>, + result: &mut ::co::tensor::SharedTensor<$t> + ) -> Result<(), ::co::error::Error> { + match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match result.add_device(self.device()) { _ => () } + self.log_softmax_plain(x, result) + } + fn log_softmax_plain( + &self, + x: &::co::tensor::SharedTensor<$t>, + result: &mut ::co::tensor::SharedTensor<$t> + ) -> Result<(), ::co::error::Error> { + if let Some(input) = x.get(self.device()).unwrap().as_native() { + let mut max_input = ::std::$t::NEG_INFINITY; + for &input_val in input.as_slice::<$t>() { + max_input = max_input.max(input_val); + } + + let mut logsum : $t = 0 as $t; + for exp in input.as_slice::<$t>().iter().map(|t| (-(max_input - t)).exp()) { + logsum += exp; + } + logsum = max_input + logsum.ln(); + + let res = input.as_slice::<$t>().iter().map(|t| t - logsum); + + ::frameworks::native::helper::write_to_memory(result.get_mut(self.device()).unwrap(), res); + return Ok(()); + } + Err(Error::Plugin( + PluginError::Operation("Unable to execute Native softmax Forward."))) + } + fn log_softmax_grad( + &self, + x: &mut ::co::tensor::SharedTensor<$t>, + x_diff: &mut ::co::tensor::SharedTensor<$t>, + result_diff: &mut ::co::tensor::SharedTensor<$t> + ) -> Result<(), ::co::error::Error> { + match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } + match result_diff.add_device(self.device()) { _ => () } + self.log_softmax_grad_plain(x, x_diff, result_diff) + } + fn log_softmax_grad_plain( + &self, + x: &::co::tensor::SharedTensor<$t>, + x_diff: &::co::tensor::SharedTensor<$t>, + result_diff: &mut ::co::tensor::SharedTensor<$t> + ) -> Result<(), ::co::error::Error> { + if let Some(sig_data) = x.get(self.device()).unwrap().as_native() { + if let Some(sig_dx) = x_diff.get(self.device()).unwrap().as_native() { + let x_slice = sig_data.as_slice::<$t>(); + let x_diff_slice = sig_dx.as_slice::<$t>(); + let mut sum = 0 as $t; + for &grad_val in x_diff_slice.iter() { + sum += grad_val; + } + let res = x_slice.iter().zip(x_diff_slice.iter()).map(|(x_val, x_diff_val)| { + x_diff_val - x_val.exp() * sum + }); + + ::frameworks::native::helper::write_to_memory(result_diff.get_mut(self.device()).unwrap(), res); + return Ok(()); + } + } + Err(Error::Plugin( + PluginError::Operation("Unable to execute Native softmax Backward."))) + + } + } + ); +} + #[macro_export] macro_rules! impl_ops_lrn_for { ($t:ident, $b:ty) => ( diff --git a/src/frameworks/native/mod.rs b/src/frameworks/native/mod.rs index 80d34ef..9a4a53c 100644 --- a/src/frameworks/native/mod.rs +++ b/src/frameworks/native/mod.rs @@ -32,6 +32,7 @@ impl_ops_relu_for!(f32, Backend); impl_ops_tanh_for!(f32, Backend); // impl_ops_convolution_for!(f32, Backend); impl_ops_softmax_for!(f32, Backend); + impl_ops_log_softmax_for!(f32, Backend); // impl_ops_lrn_for!(f32, Backend); // impl_ops_pooling_for!(f32, Backend); @@ -49,5 +50,6 @@ impl_ops_relu_for!(f64, Backend); impl_ops_tanh_for!(f64, Backend); // impl_ops_convolution_for!(f64, Backend); impl_ops_softmax_for!(f64, Backend); + impl_ops_log_softmax_for!(f64, Backend); // impl_ops_lrn_for!(f64, Backend); // impl_ops_pooling_for!(f64, Backend); diff --git a/src/plugin.rs b/src/plugin.rs index 4ef30af..1683dcc 100644 --- a/src/plugin.rs +++ b/src/plugin.rs @@ -332,7 +332,7 @@ pub trait Convolution : NN { /// Provides the functionality for a Backend to support Softmax operations. pub trait Softmax : NN { - /// Computes a [Softmax activation][softmax] over the input Tensor `x` with complete memory management. + /// Computes a [Softmax][softmax] over the input Tensor `x` with complete memory management. /// [softmax]: https://en.wikipedia.org/wiki/Softmax_function /// /// Saves the result to `result`. @@ -349,7 +349,7 @@ pub trait Softmax : NN { /// For a memory managed version see `softmax`. fn softmax_plain(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; - /// Computes the gradient of a [Softmax activation][softmax] over the input Tensor `x` with complete memory management. + /// Computes the gradient of a [Softmax][softmax] over the input Tensor `x` with complete memory management. /// [softmax]: https://en.wikipedia.org/wiki/Softmax_function /// /// Saves the result to `result_diff`. @@ -367,6 +367,41 @@ pub trait Softmax : NN { fn softmax_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; } +/// Provides the functionality for a Backend to support LogSoftmax operations. +pub trait LogSoftmax : NN { + /// Computes a logarithmic softmax over the input Tensor `x` with complete memory management. + /// + /// Saves the result to `result`. + /// + /// For a no-memory managed version see `log_softmax_plain`. + fn log_softmax(&self, x: &mut SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; + + /// Computes the logarithmic softmax over the input Tensor `x` without any memory management. + /// + /// Saves the result to `result`. + /// + /// *Attention*:
+ /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
+ /// For a memory managed version see `log_softmax`. + fn log_softmax_plain(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; + + /// Computes the gradient of a logarithmic softmax over the input Tensor `x` with complete memory management. + /// + /// Saves the result to `result_diff`. + /// + /// For a no-memory managed version see `log_softmax_grad_plain`. + fn log_softmax_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + + /// Computes the gradient of a logarithmic softmax over the input Tensor `x` without any memory management. + /// + /// Saves the result to `result_diff`. + /// + /// *Attention*:
+ /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
+ /// For a memory managed version see `log_softmax_grad`. + fn log_softmax_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; +} + /// Provides the functionality for a Backend to support Local Response Normalization operations. pub trait LRN : NN { /// Creates a new (Local Response Normalization) LRNConfig, which needs to be passed to further LRN Operations. diff --git a/tests/softmax_specs.rs b/tests/softmax_specs.rs index 1e8dbcd..7692fd0 100644 --- a/tests/softmax_specs.rs +++ b/tests/softmax_specs.rs @@ -210,6 +210,77 @@ mod softmax_spec_cuda { Err(err) => { println!("{:?}", err); assert!(false) } } } + + #[test] + fn it_computes_correct_log_softmax_on_cuda_for_f32_plain() { + let backend = get_cuda_backend(); + let native = get_native_backend(); + let (mut x, mut result) = get_memory::(&backend, &native); + + match backend.log_softmax_plain(&mut x, &mut result) { + Ok(_) => { + result.sync(native.device()).unwrap(); + if let Some(mem) = result.get(native.device()).unwrap().as_native() { + assert_eq!(&[-1.3862944f32, -1.3862944f32, -1.3862944f32, -1.3862944f32], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_log_softmax_on_cuda_for_f64_plain() { + let backend = get_cuda_backend(); + let native = get_native_backend(); + let (mut x, mut result) = get_memory::(&backend, &native); + + match backend.log_softmax_plain(&mut x, &mut result) { + Ok(_) => { + result.sync(native.device()).unwrap(); + if let Some(mem) = result.get(native.device()).unwrap().as_native() { + assert_eq!(&[-1.3862943611198908f64, + -1.3862943611198908f64, + -1.3862943611198908f64, + -1.3862943611198908f64], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_log_softmax_grad_on_cuda_for_f32_plain() { + let backend = get_cuda_backend(); + let native = get_native_backend(); + let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend, &native); + + match backend.log_softmax_grad_plain(&mut x, &mut x_diff, &mut result_diff) { + Ok(_) => { + result_diff.sync(native.device()).unwrap(); + if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() { + assert_eq!(&[-9.873127f32, -9.873127f32, -27.556225f32], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_log_softmax_grad_on_cuda_for_f64_plain() { + let backend = get_cuda_backend(); + let native = get_native_backend(); + let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend, &native); + + match backend.log_softmax_grad_plain(&mut x, &mut x_diff, &mut result_diff) { + Ok(_) => { + result_diff.sync(native.device()).unwrap(); + if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() { + assert_eq!(&[-9.87312731383618f64, -9.87312731383618f64, -27.5562243957226f64], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } } #[cfg(test)] @@ -269,7 +340,6 @@ mod softmax_spec_native { (x, x_diff, result_diff) } - #[test] fn it_computes_correct_softmax_on_native_for_f32() { let backend = get_native_backend(); @@ -389,4 +459,68 @@ mod softmax_spec_native { Err(err) => { println!("{:?}", err); assert!(false) } } } + + #[test] + fn it_computes_correct_log_softmax_on_native_for_f32_plain() { + let backend = get_native_backend(); + let (mut x, mut result) = get_memory::(&backend); + + match backend.log_softmax_plain(&mut x, &mut result) { + Ok(_) => { + if let Some(mem) = result.get(backend.device()).unwrap().as_native() { + assert_eq!(&[-1.3862944f32, -1.3862944f32, -1.3862944f32, -1.3862944f32], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_log_softmax_on_native_for_f64_plain() { + let backend = get_native_backend(); + let (mut x, mut result) = get_memory::(&backend); + + match backend.log_softmax_plain(&mut x, &mut result) { + Ok(_) => { + if let Some(mem) = result.get(backend.device()).unwrap().as_native() { + assert_eq!(&[-1.3862943611198908f64, + -1.3862943611198908f64, + -1.3862943611198908f64, + -1.3862943611198908f64], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_log_softmax_grad_on_native_for_f32_plain() { + let backend = get_native_backend(); + let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); + + match backend.log_softmax_grad_plain(&mut x, &mut x_diff, &mut result_diff) { + Ok(_) => { + if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { + assert_eq!(&[-9.873127f32, -9.873127f32, -27.556225f32], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_log_softmax_grad_on_native_for_f64_plain() { + let backend = get_native_backend(); + let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); + + match backend.log_softmax_grad_plain(&mut x, &mut x_diff, &mut result_diff) { + Ok(_) => { + if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { + assert_eq!(&[-9.87312731383618f64, -9.87312731383618f64, -27.5562243957226f64], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + }