Skip to content

Commit

Permalink
docs/core: add documentation for network initalization, solver, layer
Browse files Browse the repository at this point in the history
Also renamed ParamConfig -> WeightConfig

REFERENCES #17
  • Loading branch information
hobofan authored and MichaelHirn committed Nov 10, 2015
1 parent a26e885 commit 0b129e3
Show file tree
Hide file tree
Showing 15 changed files with 384 additions and 280 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ keywords = ["deep-learning", "neural-networks", "machine-learning", "framework"]
license = "MIT"

[dependencies]
phloem = "0.2.2"
phloem = "0.2.3"
rblas = "0.0.9"
log = "0.3.2"
clippy = "0.0.22"
Expand Down
133 changes: 83 additions & 50 deletions src/layer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use math::*;
use phloem::{Blob, Numeric};
use shared_memory::{HeapBlob, ArcLock};
use shared_memory::{ArcLock, HeapBlob};
use layers::*;
use std::fmt;

Expand Down Expand Up @@ -69,70 +69,106 @@ pub type WriteBlob<'_> = RwLockWriteGuard<'_, HeapBlob>;
pub struct Layer<'a> {
/// The configuration of the Layer
pub config: Box<&'a LayerConfig>,
/// The Layer Interface
/// The [implementation][1] of the Layer.
/// [1]: ../layers/index.html
///
/// This is the part that does most of the work ([forward][2]/[backward][3]).
/// [2]: ./trait.ILayer.html#method.forward
/// [3]: ./trait.ILayer.html#method.backward
pub worker: Box<ILayer>,

/// The vector that indicates whether each top blob has a non-zero weight in
/// the objective function.
/// The vector that indicates whether each top blob contributes to
/// the [loss][1] of the network and with which weight.
/// [1]: http://caffe.berkeleyvision.org/tutorial/loss.html
loss: Vec<f32>,

/// The vector that stores shared references to the parameters in the form of blobs.
/// The vector that stores shared references to the weights in the form of blobs.
pub blobs: Vec<ArcLock<HeapBlob>>,

/// Vector indicating whether to compute the diff of each param blob.
param_propagate_down: Vec<bool>,
/// Vector indicating whether to compute the diff of each weight blob.
///
/// You can safely ignore false values and always compute gradients
/// for all weights, but possibly with wasteful computation.
///
/// Can be used by some [Layer implementations][1] to optimize performance.
/// [1]: ../layers/index.html
weight_propagate_down: Vec<bool>,
}

impl<'a> Layer<'a> {

/// Creates a new Layer from a LayerConfig
/// Creates a new Layer from a [LayerConfig][1].
/// [1]: ./struct.LayerConfig.html
///
/// Used during [Network][2] initalization.
///
/// [2]: ../network/struct.Network.html
pub fn from_config(config: &'a LayerConfig) -> Layer {
let cl = config.clone();
let cfg = Box::<&'a LayerConfig>::new(cl);
Layer {
loss: Vec::new(),
blobs: Vec::new(),

param_propagate_down: Vec::new(),
weight_propagate_down: Vec::new(),

worker: Layer::worker_from_config(&cfg),
config: cfg,
}
}

/// Helper for [from_config] to match a [LayerType][2] to its [implementation][3].
/// [1]: #method.from_config
/// [2]: ./enum.LayerType.html
/// [3]: ../layers/index.html
fn worker_from_config(config: &LayerConfig) -> Box<ILayer> {
match config.layer_type {
LayerType::Sigmoid => Box::new(Sigmoid),
}
}

/// Sets whether the layer should compute gradients w.r.t. a
/// parameter at a particular index given by param_id.
pub fn set_param_propagate_down(&mut self, param_id: usize, value: bool) {
if self.param_propagate_down.len() <= param_id {
self.param_propagate_down.resize(param_id + 1, true);
/// weight at a particular index given by `weight_id`.
///
/// See [`weight_propagate_down`][1]
/// ./struct.Layer.html
pub fn set_weight_propagate_down(&mut self, weight_id: usize, value: bool) {
if self.weight_propagate_down.len() <= weight_id {
self.weight_propagate_down.resize(weight_id + 1, true);
}
self.param_propagate_down[param_id] = value;
self.weight_propagate_down[weight_id] = value;

}

/// Returns the loss
pub fn loss(&self, id: usize) -> Option<&f32> {
self.loss.get(id)
/// Returns the [loss weight][1] associated with the weight blob
/// with id `weight_id`.
/// [1]: http://caffe.berkeleyvision.org/tutorial/loss.html
pub fn loss(&self, weight_id: usize) -> Option<&f32> {
self.loss.get(weight_id)
}
}

/// A Layer in a Neural Network that can handle forward and backward of a computation step.
/// A Layer in a [Neural Network][1] that can handle forward and backward of a computation step.
/// [1]: ../network/index.html
pub trait ILayer {
/// Compute the layer output.
/// Compute the [feedforward][1] layer output.
/// Uses the CPU.
/// [1]: https://en.wikipedia.org/wiki/Feedforward_neural_network
fn forward_cpu(&self, bottom: &[ReadBlob], top: &mut Vec<&mut WriteBlob>);
/// Compute the gradients for the bottom blobs
/// if the corresponding value of propagate_down is true.
/// Uses the CPU.
fn backward_cpu(&self, top: &[HeapBlob], propagate_down: &[bool], bottom: &mut Vec<HeapBlob>);

/// Compute the layer output using the currently set computation method (CPU).
/// Compute the [feedforward][1] layer output using the currently set computation method.
/// [1]: https://en.wikipedia.org/wiki/Feedforward_neural_network
///
/// Aquires read locks for the bottom blobs ([ReadBlob][2])
/// and write locks for the top blobs ([WriteBlob][3]) to ensure sequential computation,
/// and then passes them to computation method specific function ([forward_cpu][4]).
///
/// [2]: ./type.ReadBlob.html
/// [3]: ./type.WriteBlob.html
/// [3]: #method.forward_cpu
fn forward(&self, bottom: &[ArcLock<HeapBlob>], top: &mut Vec<ArcLock<HeapBlob>>) -> f32 {
// Lock();
// Reshape(bottom, top); // Reshape the layer to fit top & bottom blob
Expand Down Expand Up @@ -214,21 +250,20 @@ impl fmt::Debug for ILayer {
#[derive(Debug)]
/// Layer Configuration Struct
pub struct LayerConfig {
/// The Name of the Layer
/// The name of the Layer
pub name: String,

/// The type of the Layer
layer_type: LayerType,

/// The Name for each top Blob
/// The name for each top Blob
tops: Vec<String>,

/// The Name for each bottom Blob
/// The name for each bottom Blob
bottoms: Vec<String>,

/// Specifies training parameters (multipliers on global learning constants,
/// and the name and other settings used for weight sharing).
params: Vec<ParamConfig>,
/// Specifies training configuration for each weight blob.
params: Vec<WeightConfig>,

/// Specifies on which bottoms the backpropagation should be skipped.
/// The size must be either 0 or equal to the number of bottoms.
Expand All @@ -243,7 +278,6 @@ pub enum LayerType {
}

impl LayerConfig {

/// Creates a new LayerConfig
pub fn new(name: String, layer_type: LayerType) -> LayerConfig {
LayerConfig {
Expand Down Expand Up @@ -278,8 +312,8 @@ impl LayerConfig {
self.bottoms.len()
}

/// Returns the requested ParamConfig
pub fn param(&self, param_id: usize) -> Option<&ParamConfig> {
/// Returns the requested WeightConfig
pub fn param(&self, param_id: usize) -> Option<&WeightConfig> {
self.params.get(param_id)
}

Expand All @@ -288,19 +322,18 @@ impl LayerConfig {
self.params.len()
}

/// Checks if propagate down length works out
/// Checks if propagate down length is sane
pub fn check_propagate_down_len(&self) -> bool {
self.propagate_down.is_empty() || self.propagate_down.len() == self.bottoms.len()
}
}


#[derive(Debug)]
/// Specifies training parameters (multipliers on global learning constants,
/// and the name and other settings used for weight sharing).
pub struct ParamConfig {
/// The names of the parameter blobs -- useful for sharing parameters among
/// layers, but never required otherwise. To share a parameter between two
/// Specifies training configuration for a weight blob.
pub struct WeightConfig {
/// The name of the weight blob -- useful for sharing weights among
/// layers, but never required otherwise. To share a weight between two
/// layers, give it a (non-empty) name.
///
/// Default: ""
Expand All @@ -322,9 +355,9 @@ pub struct ParamConfig {
pub decay_mult: Option<f32>,
}

impl Default for ParamConfig {
fn default() -> ParamConfig {
ParamConfig {
impl Default for WeightConfig {
fn default() -> WeightConfig {
WeightConfig {
name: "".to_owned(),
share_mode: DimCheckMode::Strict,
lr_mult: None,
Expand All @@ -333,9 +366,9 @@ impl Default for ParamConfig {
}
}

impl ParamConfig {
/// Checks dimensions of two blobs according to the share_mode.
/// Logs an error if there is a count/shape mismatch.
impl WeightConfig {
/// Checks dimensions of two blobs according to the `share_mode`.
/// Returns an error if there is a count/shape mismatch.
pub fn check_dimensions<T: Numeric>(&self,
blob_one: &Blob<T>,
blob_two: &Blob<T>,
Expand All @@ -347,10 +380,10 @@ impl ParamConfig {
// Permissive dimension checking -- only check counts are the same.
DimCheckMode::Permissive => {
if blob_one.capacity() != blob_two.capacity() {
return Err(format!("Cannot share param '{}' owned by layer '{}' with layer '{}';
return Err(format!("Cannot share weight '{}' owned by layer '{}' with layer '{}';
count mismatch.
Owner layer param shape is {};
Sharing layer param shape is {}",
Owner layer weight shape is {};
Sharing layer weight shape is {}",
param_name,
owner_name,
layer_name,
Expand All @@ -361,10 +394,10 @@ impl ParamConfig {
// Strict dimension checking -- all dims must be the same.
DimCheckMode::Strict => {
if blob_one.shape() != blob_two.shape() {
return Err(format!("Cannot share param '{}' owned by layer '{}' with layer '{}';
return Err(format!("Cannot share weight '{}' owned by layer '{}' with layer '{}';
shape mismatch.
Owner layer param shape is {};
Sharing layer expects param shape {}",
Owner layer weight shape is {};
Sharing layer expects weight shape {}",
param_name,
owner_name,
layer_name,
Expand All @@ -376,15 +409,15 @@ impl ParamConfig {
Ok(())
}

/// The multiplier on the global learning rate for this parameter.
/// The multiplier on the global learning rate for this weight blob.
pub fn lr_mult(&self) -> f32 {
match self.lr_mult {
Some(val) => val,
None => 1.0f32,
}
}

/// The multiplier on the global weight decay for this parameter.
/// The multiplier on the global weight decay for this weight blob.
pub fn decay_mult(&self) -> f32 {
match self.decay_mult {
Some(val) => val,
Expand Down
15 changes: 10 additions & 5 deletions src/layers/activation/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
//! Provides nonlinear activation methods.
//!
//! Activation Layers take a bottom Blob, provide the activation operation and produce a top Blob.
//! Thanks to the nonlinearity of the activation methods, we can 'learn' and detect nonlinearities
//! Activation Layers take a bottom Blob, provide the activation operation and
//! produce a top Blob.
//! Thanks to the nonlinearity of the activation methods, we can 'learn' and
//! detect nonlinearities
//! in our (complex) datasets.
//!
//! The activation operation used should depend on the task at hand. For binary classification a
//! step function might be very useful. For more complex tasks continious activation functions such
//! as Sigmoid, TanH, Softmax or ReLU should be used. In most cases ReLU might prove the best
//! The activation operation used should depend on the task at hand. For binary
//! classification a
//! step function might be very useful. For more complex tasks continious
//! activation functions such
//! as Sigmoid, TanH, Softmax or ReLU should be used. In most cases ReLU might
//! prove the best
//! results.
//!
//! The activation function is also sometimes called transfer function.
Expand Down
9 changes: 5 additions & 4 deletions src/layers/activation/sigmoid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
//!
//! ReLu, compared to Sigmoid
//!
//! * reduces the likelyhood of vanishing gradients
//! * increases the likelyhood of a more beneficial sparse representation
//! * can be computed faster
//! * is therefore the most popular activation function in DNNs as of this writing (2015).
//! * reduces the likelyhood of vanishing gradients
//! * increases the likelyhood of a more beneficial sparse representation
//! * can be computed faster
//! * is therefore the most popular activation function in DNNs as of this
//! writing (2015).
use shared_memory::*;
use layer::*;

Expand Down
4 changes: 2 additions & 2 deletions src/layers/common/convolution.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Convolves the top Blob
//!
//! Does this convolution with a set of learnable filters, each producing one feature map in the
//! top Blob.
//! Does this convolution with a set of learnable filters, each producing one
//! feature map in the top Blob.
#[derive(Debug, Copy, Clone)]
/// Convolution Layer
Expand Down
4 changes: 2 additions & 2 deletions src/layers/common/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Provides common neural network layers.
//!
//! For now the layers in common should be discribed as layers that are typical layers for building
//! neural networks but are not activation or loss layers.
//! For now the layers in common should be discribed as layers that are typical
//! layers for building neural networks but are not activation or loss layers.
pub use self::convolution::Convolution;

pub mod convolution;
4 changes: 2 additions & 2 deletions src/layers/loss/softmax.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Computes the multinomial logistic loss of the softmax of its bottom Blob.
//!
//! This is conceptually identical to a softmax layer followed by a multinomial logistic loss
//! layer, but provides a more numerically stable gradient.
//! This is conceptually identical to a softmax layer followed by a multinomial
//! logistic loss layer, but provides a more numerically stable gradient.
#[derive(Debug, Copy, Clone)]
/// Softmax Loss Layer
Expand Down
Loading

0 comments on commit 0b129e3

Please sign in to comment.