diff --git a/benches/softmax.rs b/benches/softmax.rs new file mode 100644 index 0000000..6ddb0be --- /dev/null +++ b/benches/softmax.rs @@ -0,0 +1,86 @@ +#![feature(test)] +#![feature(clone_from_slice)] + +extern crate test; +extern crate collenchyma as co; +extern crate collenchyma_nn as co_nn; +extern crate rand; + +use test::Bencher; +use co::backend::{Backend, BackendConfig}; +use co::frameworks::Native; +use co::framework::IFramework; +use co::tensor::SharedTensor; +use co_nn::*; + +use rand::{thread_rng, Rng}; + +fn backend() -> Backend { + let framework = Native::new(); + let hardwares = framework.hardwares(); + let backend_config = BackendConfig::new(framework, hardwares); + Backend::new(backend_config).unwrap() +} + +fn arguments(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor) { + let mut rng = thread_rng(); + let slice_x = rng.gen_iter::().take(size).collect::>(); + + let mut x = SharedTensor::::new(backend.device(), &size).unwrap(); + let out = SharedTensor::::new(backend.device(), &size).unwrap(); + x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x); + (x, out) +} + +fn arguments_grad(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor, SharedTensor) { + let mut rng = thread_rng(); + let slice_x = rng.gen_iter::().take(size).collect::>(); + + let mut x = SharedTensor::::new(backend.device(), &size).unwrap(); + let mut dx = SharedTensor::::new(backend.device(), &size).unwrap(); + let dout = SharedTensor::::new(backend.device(), &size).unwrap(); + x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x); + dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x); + (x, dx, dout) +} + +#[inline(never)] +fn bench_profile ()>( + b: &mut Bencher, + mut bench_func: F, + times: usize +) { + b.iter(|| { for _ in 0..times { bench_func(); } }); +} + +#[bench] +fn bench_1000_softmax_100_native(b: &mut Bencher) { + let backend = backend(); + let (mut x, mut out) = arguments(&backend, 100); + let mut func = || { let _ = backend.softmax_plain(&mut x, &mut out); }; + { func(); bench_profile(b, func, 1000); } +} + +#[bench] +fn bench_10_softmax_10000_native(b: &mut Bencher) { + let backend = backend(); + let (mut x, mut out) = arguments(&backend, 10000); + let mut func = || { let _ = backend.softmax_plain(&mut x, &mut out); }; + { func(); bench_profile(b, func, 10); } +} + +#[bench] +fn bench_1000_softmax_grad_100_native(b: &mut Bencher) { + let backend = backend(); + let (mut x, mut dx, mut dout) = arguments_grad(&backend, 100); + let mut func = || { let _ = backend.softmax_grad_plain(&mut x, &mut dx, &mut dout); }; + { func(); bench_profile(b, func, 1000); } +} + +#[bench] +fn bench_10_softmax_grad_10000_native(b: &mut Bencher) { + let backend = backend(); + let (mut x, mut dx, mut dout) = arguments_grad(&backend, 10000); + let mut func = || { let _ = backend.softmax_grad_plain(&mut x, &mut dx, &mut dout); }; + { func(); bench_profile(b, func, 10); } +} diff --git a/src/frameworks/native/helper.rs b/src/frameworks/native/helper.rs index 399a5d1..ba35f2d 100644 --- a/src/frameworks/native/helper.rs +++ b/src/frameworks/native/helper.rs @@ -130,8 +130,8 @@ macro_rules! impl_ops_sigmoid_for { result_diff: &mut ::co::tensor::SharedTensor<$t> ) -> Result<(), ::co::error::Error> { match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } + match result.add_device(self.device()) { _ => try!(result.sync(self.device())) } match result_diff.add_device(self.device()) { _ => () } self.sigmoid_grad_plain(x, x_diff, result, result_diff) } @@ -193,8 +193,8 @@ macro_rules! impl_ops_relu_for { result_diff: &mut ::co::tensor::SharedTensor<$t> ) -> Result<(), ::co::error::Error> { match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } + match result.add_device(self.device()) { _ => try!(result.sync(self.device())) } self.relu_grad_plain(x, x_diff, result, result_diff) } @@ -256,8 +256,8 @@ macro_rules! impl_ops_tanh_for { result_diff: &mut ::co::tensor::SharedTensor<$t> ) -> Result<(), ::co::error::Error> { match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } + match result.add_device(self.device()) { _ => try!(result.sync(self.device())) } self.tanh_grad_plain(x, x_diff, result, result_diff) } @@ -354,16 +354,28 @@ macro_rules! impl_ops_softmax_for { x: &mut ::co::tensor::SharedTensor<$t>, result: &mut ::co::tensor::SharedTensor<$t> ) -> Result<(), ::co::error::Error> { - unimplemented!(); - Ok(()) + match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match result.add_device(self.device()) { _ => () } + self.softmax_plain(x, result) } fn softmax_plain( &self, x: &::co::tensor::SharedTensor<$t>, result: &mut ::co::tensor::SharedTensor<$t> ) -> Result<(), ::co::error::Error> { - unimplemented!(); - Ok(()) + if let Some(input) = x.get(self.device()).unwrap().as_native() { + let mut exps = Vec::with_capacity(x.capacity()); + let mut sum : $t = 0 as $t; + for exp in input.as_slice::<$t>().iter().map(|t|t.exp()) { + exps.push(exp); + sum += exp; + } + let res = exps.iter().map(|t| t / sum); + ::frameworks::native::helper::write_to_memory(result.get_mut(self.device()).unwrap(), res); + return Ok(()); + } + Err(Error::Plugin( + PluginError::Operation("Unable to execute Native softmax Forward."))) } fn softmax_grad( &self, @@ -371,8 +383,10 @@ macro_rules! impl_ops_softmax_for { x_diff: &mut ::co::tensor::SharedTensor<$t>, result_diff: &mut ::co::tensor::SharedTensor<$t> ) -> Result<(), ::co::error::Error> { - unimplemented!(); - Ok(()) + match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } + match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } + match result_diff.add_device(self.device()) { _ => () } + self.softmax_grad_plain(x, x_diff, result_diff) } fn softmax_grad_plain( &self, @@ -380,8 +394,24 @@ macro_rules! impl_ops_softmax_for { x_diff: &::co::tensor::SharedTensor<$t>, result_diff: &mut ::co::tensor::SharedTensor<$t> ) -> Result<(), ::co::error::Error> { - unimplemented!(); - Ok(()) + if let Some(sig_data) = x.get(self.device()).unwrap().as_native() { + if let Some(sig_dx) = x_diff.get(self.device()).unwrap().as_native() { + let mut dot : $t = 0 as $t; + let sig_data_slice = sig_data.as_slice::<$t>(); + let sig_dx_slice = sig_dx.as_slice::<$t>(); + for (t, dt) in sig_data_slice.iter().zip(sig_dx_slice.iter()) { + dot += t * dt; + } + let res = sig_data_slice.iter() + .zip(sig_dx_slice.iter()) + .map(|(t, dt)| t * (dt - dot)); + ::frameworks::native::helper::write_to_memory(result_diff.get_mut(self.device()).unwrap(), res); + return Ok(()); + } + } + Err(Error::Plugin( + PluginError::Operation("Unable to execute Native softmax Backward."))) + } } ); diff --git a/src/frameworks/native/mod.rs b/src/frameworks/native/mod.rs index c70b967..80d34ef 100644 --- a/src/frameworks/native/mod.rs +++ b/src/frameworks/native/mod.rs @@ -31,7 +31,7 @@ impl_ops_sigmoid_for!(f32, Backend); impl_ops_relu_for!(f32, Backend); impl_ops_tanh_for!(f32, Backend); // impl_ops_convolution_for!(f32, Backend); -// impl_ops_softmax_for!(f32, Backend); + impl_ops_softmax_for!(f32, Backend); // impl_ops_lrn_for!(f32, Backend); // impl_ops_pooling_for!(f32, Backend); @@ -48,6 +48,6 @@ impl_ops_sigmoid_for!(f64, Backend); impl_ops_relu_for!(f64, Backend); impl_ops_tanh_for!(f64, Backend); // impl_ops_convolution_for!(f64, Backend); -// impl_ops_softmax_for!(f64, Backend); + impl_ops_softmax_for!(f64, Backend); // impl_ops_lrn_for!(f64, Backend); // impl_ops_pooling_for!(f64, Backend); diff --git a/tests/softmax_specs.rs b/tests/softmax_specs.rs index 985bd3f..3977b33 100644 --- a/tests/softmax_specs.rs +++ b/tests/softmax_specs.rs @@ -215,185 +215,173 @@ mod softmax_spec_cuda { #[cfg(test)] #[cfg(feature = "native")] mod softmax_spec_native { - // use co::backend::{Backend, BackendConfig}; - // use co::framework::IFramework; - // use co::frameworks::Native; - // use co_nn::*; - // use co::memory::MemoryType; - // use co::tensor::SharedTensor; - // use co::plugin::numeric_helpers::{cast, Float}; - // - // - // fn get_native_backend() -> Backend { - // let framework = Native::new(); - // let hardwares = framework.hardwares(); - // let backend_config = BackendConfig::new(framework, hardwares); - // Backend::new(backend_config).unwrap() - // } - // - // fn write_to_memory(mem: &mut MemoryType, data: &[T]) { - // match mem { - // &mut MemoryType::Native(ref mut mem) => { - // let mut mem_buffer = mem.as_mut_slice::(); - // for (index, datum) in data.iter().enumerate() { - // mem_buffer[index] = *datum; - // } - // }, - // #[cfg(any(feature = "opencl", feature = "cuda"))] - // _ => {} - // } - // } - // - // fn get_memory(backend: &Backend) -> (SharedTensor, SharedTensor){ - // let val = cast::(1f64).unwrap(); - // let mut x = SharedTensor::::new(backend.device(), &(1, 1, 4)).unwrap(); - // write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val, val]); - // - // let result = SharedTensor::::new(backend.device(), &(1, 1, 4)).unwrap(); - // - // (x, result) - // } - // - // fn get_grad_memory(backend: &Backend) -> (SharedTensor, SharedTensor, SharedTensor){ - // let val = cast::(1f64).unwrap(); - // let val2 = cast::(2f64).unwrap(); - // let mut x = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap(); - // write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]); - // - // let mut x_diff = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap(); - // write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]); - // - // let result_diff = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap(); - // - // (x, x_diff, result_diff) - // } - - - // #[test] - // #[ignore] - // fn it_computes_correct_softmax_on_native_for_f32() { - // let backend = get_native_backend(); - // let (mut x, mut result) = get_memory::(&backend); - // - // match backend.softmax(&mut x, &mut result) { - // Ok(_) => { - // if let Some(mem) = result.get(backend.device()).unwrap().as_native() { - // assert_eq!(&[0.25f32, 0.25f32, 0.25f32, 0.25f32], mem.as_slice::()); - // } - // }, - // Err(err) => { println!("{:?}", err); assert!(false) } - // } - // } - // - // #[test] - // #[ignore] - // fn it_computes_correct_softmax_on_native_for_f64() { - // let backend = get_native_backend(); - // let (mut x, mut result) = get_memory::(&backend); - // - // match backend.softmax(&mut x, &mut result) { - // Ok(_) => { - // if let Some(mem) = result.get(backend.device()).unwrap().as_native() { - // assert_eq!(&[0.25f64, 0.25f64, 0.25f64, 0.25f64], mem.as_slice::()); - // } - // }, - // Err(err) => { println!("{:?}", err); assert!(false) } - // } - // } - // - // #[test] - // #[ignore] - // fn it_computes_correct_softmax_on_native_for_f32_plain() { - // let backend = get_native_backend(); - // let (mut x, mut result) = get_memory::(&backend); - // - // match backend.softmax_plain(&mut x, &mut result) { - // Ok(_) => { - // if let Some(mem) = result.get(backend.device()).unwrap().as_native() { - // assert_eq!(&[0.25f32, 0.25f32, 0.25f32, 0.25f32], mem.as_slice::()); - // } - // }, - // Err(err) => { println!("{:?}", err); assert!(false) } - // } - // } - // - // #[test] - // #[ignore] - // fn it_computes_correct_softmax_on_native_for_f64_plain() { - // let backend = get_native_backend(); - // let (mut x, mut result) = get_memory::(&backend); - // - // match backend.softmax_plain(&mut x, &mut result) { - // Ok(_) => { - // if let Some(mem) = result.get(backend.device()).unwrap().as_native() { - // assert_eq!(&[0.25f64, 0.25f64, 0.25f64, 0.25f64], mem.as_slice::()); - // } - // }, - // Err(err) => { println!("{:?}", err); assert!(false) } - // } - // } - // - // #[test] - // #[ignore] - // fn it_computes_correct_softmax_grad_on_native_for_f32() { - // let backend = get_native_backend(); - // let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); - // - // match backend.softmax_grad(&mut x, &mut x_diff, &mut result_diff) { - // Ok(_) => { - // if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { - // assert_eq!(&[-5f32, -5f32, -8f32], mem.as_slice::()); - // } - // }, - // Err(err) => { println!("{:?}", err); assert!(false) } - // } - // } - // - // #[test] - // #[ignore] - // fn it_computes_correct_softmax_grad_on_native_for_f64() { - // let backend = get_native_backend(); - // let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); - // - // match backend.softmax_grad(&mut x, &mut x_diff, &mut result_diff) { - // Ok(_) => { - // if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { - // assert_eq!(&[-5f64, -5f64, -8f64], mem.as_slice::()); - // } - // }, - // Err(err) => { println!("{:?}", err); assert!(false) } - // } - // } - // - // #[test] - // #[ignore] - // fn it_computes_correct_softmax_grad_on_native_for_f32_plain() { - // let backend = get_native_backend(); - // let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); - // - // match backend.softmax_grad_plain(&mut x, &mut x_diff, &mut result_diff) { - // Ok(_) => { - // if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { - // assert_eq!(&[-5f32, -5f32, -8f32], mem.as_slice::()); - // } - // }, - // Err(err) => { println!("{:?}", err); assert!(false) } - // } - // } - // - // #[test] - // #[ignore] - // fn it_computes_correct_softmax_grad_on_native_for_f64_plain() { - // let backend = get_native_backend(); - // let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); - // - // match backend.softmax_grad_plain(&mut x, &mut x_diff, &mut result_diff) { - // Ok(_) => { - // if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { - // assert_eq!(&[-5f64, -5f64, -8f64], mem.as_slice::()); - // } - // }, - // Err(err) => { println!("{:?}", err); assert!(false) } - // } - // } + use co::backend::{Backend, BackendConfig}; + use co::framework::IFramework; + use co::frameworks::Native; + use co_nn::*; + use co::memory::MemoryType; + use co::tensor::SharedTensor; + use co::plugin::numeric_helpers::{cast, Float}; + + + fn get_native_backend() -> Backend { + let framework = Native::new(); + let hardwares = framework.hardwares(); + let backend_config = BackendConfig::new(framework, hardwares); + Backend::new(backend_config).unwrap() + } + + fn write_to_memory(mem: &mut MemoryType, data: &[T]) { + let &mut MemoryType::Native(ref mut mem) = mem; + let mut mem_buffer = mem.as_mut_slice::(); + for (index, datum) in data.iter().enumerate() { + mem_buffer[index] = *datum; + } + } + + + fn get_memory(backend: &Backend) -> (SharedTensor, SharedTensor){ + let val = cast::(1f64).unwrap(); + let mut x = SharedTensor::::new(backend.device(), &(1, 1, 4)).unwrap(); + write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val, val]); + + let result = SharedTensor::::new(backend.device(), &(1, 1, 4)).unwrap(); + + (x, result) + } + + fn get_grad_memory(backend: &Backend) -> (SharedTensor, SharedTensor, SharedTensor){ + let val = cast::(1f64).unwrap(); + let val2 = cast::(2f64).unwrap(); + let mut x = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap(); + write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]); + + let mut x_diff = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap(); + write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]); + + let result_diff = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap(); + + (x, x_diff, result_diff) + } + + + #[test] + fn it_computes_correct_softmax_on_native_for_f32() { + let backend = get_native_backend(); + let (mut x, mut result) = get_memory::(&backend); + + match backend.softmax(&mut x, &mut result) { + Ok(_) => { + if let Some(mem) = result.get(backend.device()).unwrap().as_native() { + assert_eq!(&[0.25f32, 0.25f32, 0.25f32, 0.25f32], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_softmax_on_native_for_f64() { + let backend = get_native_backend(); + let (mut x, mut result) = get_memory::(&backend); + + match backend.softmax(&mut x, &mut result) { + Ok(_) => { + if let Some(mem) = result.get(backend.device()).unwrap().as_native() { + assert_eq!(&[0.25f64, 0.25f64, 0.25f64, 0.25f64], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_softmax_on_native_for_f32_plain() { + let backend = get_native_backend(); + let (mut x, mut result) = get_memory::(&backend); + + match backend.softmax_plain(&mut x, &mut result) { + Ok(_) => { + if let Some(mem) = result.get(backend.device()).unwrap().as_native() { + assert_eq!(&[0.25f32, 0.25f32, 0.25f32, 0.25f32], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_softmax_on_native_for_f64_plain() { + let backend = get_native_backend(); + let (mut x, mut result) = get_memory::(&backend); + + match backend.softmax_plain(&mut x, &mut result) { + Ok(_) => { + if let Some(mem) = result.get(backend.device()).unwrap().as_native() { + assert_eq!(&[0.25f64, 0.25f64, 0.25f64, 0.25f64], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_softmax_grad_on_native_for_f32() { + let backend = get_native_backend(); + let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); + + match backend.softmax_grad(&mut x, &mut x_diff, &mut result_diff) { + Ok(_) => { + if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { + assert_eq!(&[-5f32, -5f32, -8f32], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_softmax_grad_on_native_for_f64() { + let backend = get_native_backend(); + let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); + + match backend.softmax_grad(&mut x, &mut x_diff, &mut result_diff) { + Ok(_) => { + if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { + assert_eq!(&[-5f64, -5f64, -8f64], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_softmax_grad_on_native_for_f32_plain() { + let backend = get_native_backend(); + let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); + + match backend.softmax_grad_plain(&mut x, &mut x_diff, &mut result_diff) { + Ok(_) => { + if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { + assert_eq!(&[-5f32, -5f32, -8f32], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } + + #[test] + fn it_computes_correct_softmax_grad_on_native_for_f64_plain() { + let backend = get_native_backend(); + let (mut x, mut x_diff, mut result_diff) = get_grad_memory::(&backend); + + match backend.softmax_grad_plain(&mut x, &mut x_diff, &mut result_diff) { + Ok(_) => { + if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() { + assert_eq!(&[-5f64, -5f64, -8f64], mem.as_slice::()); + } + }, + Err(err) => { println!("{:?}", err); assert!(false) } + } + } }