diff --git a/Cargo.toml b/Cargo.toml index 5bd52a23b..96701f66a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,8 @@ keywords = ["machine-learning", "linfa", "ai", "ml"] categories = ["algorithms", "mathematics", "science"] [dependencies] + +linfa-supervised = {path = "linfa-supervised"} linfa-clustering = { path = "linfa-clustering", version = "0.1" } [dev-dependencies] @@ -22,7 +24,8 @@ rand_isaac = "0.2.0" ndarray-npy = { version = "0.5", default-features = false } [workspace] -members = ["linfa-clustering"] +members = ["linfa-clustering", "linfa-supervised"] [profile.release] opt-level = 3 +lto = true \ No newline at end of file diff --git a/linfa-supervised/Cargo.toml b/linfa-supervised/Cargo.toml new file mode 100644 index 000000000..63614e10b --- /dev/null +++ b/linfa-supervised/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "linfa-supervised" +version = "0.1.0" +authors = ["Khalil HADJI "] +edition = "2018" +description = "A collection of supervised learning algorithms" +license = "MIT/Apache-2.0" + +repository = "https://github.com/LukeMathWalker/linfa" + +keywords = ["supervised", "machine-learning", "linfa", "regression", "linear", "ridge", "lasso"] +categories = ["algorithms", "mathematics", "science"] + +[dependencies] +ndarray = { version = "0.13" , features = ["rayon"] } +ndarray-linalg = { version = "0.12", features = ["openblas"] } \ No newline at end of file diff --git a/linfa-supervised/examples/main.rs b/linfa-supervised/examples/main.rs new file mode 100644 index 000000000..3020418a9 --- /dev/null +++ b/linfa-supervised/examples/main.rs @@ -0,0 +1,40 @@ +use linfa_supervised::LinearRegression; +use linfa_supervised::RidgeRegression; +use ndarray::array; + +fn linear_regression() { + let mut linear_regression = LinearRegression::new(false); + let x = array![[1.0], [2.0], [3.0], [4.0]]; + let y = array![1.0, 2.0, 3.0, 4.0]; + linear_regression.fit(&x, &y); + let x_hat = array![[6.0], [7.0]]; + println!("{:#?}", linear_regression.predict(&x_hat)); + + let mut linear_regression2 = LinearRegression::new(true); + let x2 = array![[1.0], [2.0], [3.0], [4.0]]; + let y2 = array![2.0, 3.0, 4.0, 5.0]; + linear_regression2.fit(&x2, &y2); + let x2_hat = array![[6.0], [7.0]]; + println!("{:#?}", linear_regression2.predict(&x2_hat)); +} + +fn ridge_regression() { + let mut ridge_regression = RidgeRegression::new(0.0); + let x = array![[1.0], [2.0], [3.0], [4.0]]; + let y = array![1.0, 2.0, 3.0, 4.0]; + ridge_regression.fit(&x, &y); + let x_hat = array![[6.0], [7.0]]; + println!("{:#?}", ridge_regression.predict(&x_hat)); + + let mut ridge_regression2 = RidgeRegression::new(1.0); + let x2 = array![[1.0], [2.0], [3.0], [4.0]]; + let y2 = array![2.0, 3.0, 4.0, 5.0]; + ridge_regression2.fit(&x2, &y2); + let x2_hat = array![[6.0], [7.0]]; + println!("{:#?}", ridge_regression2.predict(&x2_hat)); +} + +fn main() { + linear_regression(); + ridge_regression(); +} diff --git a/linfa-supervised/src/lib.rs b/linfa-supervised/src/lib.rs new file mode 100644 index 000000000..d6091f110 --- /dev/null +++ b/linfa-supervised/src/lib.rs @@ -0,0 +1,5 @@ +mod linear_regression; +mod ridge_regression; + +pub use linear_regression::*; +pub use ridge_regression::*; diff --git a/linfa-supervised/src/linear_regression/algorithm.rs b/linfa-supervised/src/linear_regression/algorithm.rs new file mode 100644 index 000000000..32fe16bfd --- /dev/null +++ b/linfa-supervised/src/linear_regression/algorithm.rs @@ -0,0 +1,119 @@ +#![allow(non_snake_case)] +use ndarray::{stack, Array, Array1, ArrayBase, Axis, Data, Ix1, Ix2}; +use ndarray_linalg::Solve; +/* I will probably change the implementation for an enum for more type safety. +I have to make sure, it is a great idea when it comes to pyhton interoperability +enum Intercept { + NoIntercept, + Intercept(Array1) +} +pub struct LinearRegressor { + beta : Option>, + intercept : Intercept, +} +*/ + +/* +If fit_intercept is false, we suppose that the regression passes throught the origin +*/ +/* +The simple linear regression model is + y = bX + e where e ~ N(0, sigma^2 * I) +In probabilistic terms this corresponds to + y - bX ~ N(0, sigma^2 * I) + y | X, b ~ N(bX, sigma^2 * I) +The loss for the model is simply the squared error between the model +predictions and the true values: + Loss = ||y - bX||^2 +The maximum likelihood estimation for the model parameters `beta` can be computed +in closed form via the normal equation: + b = (X^T X)^{-1} X^T y +where (X^T X)^{-1} X^T is known as the pseudoinverse or Moore-Penrose inverse. +*/ +pub struct LinearRegression { + beta: Option>, + fit_intercept: bool, +} + +impl LinearRegression { + pub fn new(fit_intercept: bool) -> LinearRegression { + LinearRegression { + beta: None, + fit_intercept, + } + } + /* Instead of assert_eq we should probably return a Result, we first have to have a generic error type for all algorithms */ + pub fn fit(&mut self, X: &ArrayBase, Y: &ArrayBase) + where + A: Data, + B: Data, + { + let (n_samples, _) = X.dim(); + + // We have to make sure that the dimensions match + assert_eq!(Y.dim(), n_samples); + + self.beta = if self.fit_intercept { + let dummy_column: Array = Array::ones((n_samples, 1)); + /* + if x is has 2 features and 3 samples + x = [[1,2] + ,[3,4] + ,[5,6]] + dummy_column = [[1] + ,[1] + ,[1]] + */ + let X_with_ones = stack(Axis(1), &[dummy_column.view(), X.view()]).unwrap(); + Some(LinearRegression::fit_beta(&X_with_ones, Y)) + } else { + Some(LinearRegression::fit_beta(X, Y)) + } + } + fn fit_beta(X: &ArrayBase, y: &ArrayBase) -> Array1 + where + A: Data, + B: Data, + { + let rhs = X.t().dot(y); + let linear_operator = X.t().dot(X); + linear_operator.solve_into(rhs).unwrap() + } + + pub fn predict(&self, X: &ArrayBase) -> Array1 + where + A: Data, + { + let (n_samples, _) = X.dim(); + + // If we are fitting the intercept, we need an additional column + if self.fit_intercept { + let dummy_column: Array = Array::ones((n_samples, 1)); + let X = stack(Axis(1), &[dummy_column.view(), X.view()]).unwrap(); + match &self.beta { + None => panic!("The linear regression estimator has to be fitted first!"), + Some(beta) => X.dot(beta), + } + } else { + match &self.beta { + None => panic!("The linear regression estimator has to be fitted first!"), + Some(beta) => X.dot(beta), + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use ndarray::array; + #[test] + fn linear_regression_test() { + let mut linear_regression = LinearRegression::new(false); + let x = array![[1.0], [2.0], [3.0], [4.0]]; + let y = array![1.0, 2.0, 3.0, 4.0]; + linear_regression.fit(&x, &y); + let x_hat = array![[6.0]]; + assert_eq!(linear_regression.predict(&x_hat), array![6.0]) + } +} diff --git a/linfa-supervised/src/linear_regression/mod.rs b/linfa-supervised/src/linear_regression/mod.rs new file mode 100644 index 000000000..4f6742108 --- /dev/null +++ b/linfa-supervised/src/linear_regression/mod.rs @@ -0,0 +1,3 @@ +mod algorithm; + +pub use algorithm::*; diff --git a/linfa-supervised/src/ridge_regression/algorithm.rs b/linfa-supervised/src/ridge_regression/algorithm.rs new file mode 100644 index 000000000..55ee32f9a --- /dev/null +++ b/linfa-supervised/src/ridge_regression/algorithm.rs @@ -0,0 +1,44 @@ +#![allow(non_snake_case)] +use ndarray::{Array, Array1, ArrayBase, Data, Ix1, Ix2}; +use ndarray_linalg::Solve; +/* The difference between a linear regression and a Ridge regression is + that ridge regression has an L2 penalisation term to having some features + "taking all the credit" for the output. It is also a way to deal with over-fitting by adding bias. + Some details ... + b = (X^T X + aI)X^T y with a being the regularisation/penalisation term +*/ + +pub struct RidgeRegression { + beta: Option>, + alpha: f64, +} + +impl RidgeRegression { + pub fn new(alpha: f64) -> RidgeRegression { + RidgeRegression { + beta: None, + alpha: alpha, + } + } + + pub fn fit(&mut self, X: &ArrayBase, Y: &ArrayBase) + where + A: Data, + B: Data, + { + let second_term = X.t().dot(Y); + let (_, identity_size) = X.dim(); + let linear_operator = X.t().dot(X) + self.alpha * Array::eye(identity_size); + self.beta = Some(linear_operator.solve_into(second_term).unwrap()); + } + + pub fn predict(&self, X: &ArrayBase) -> Array1 + where + A: Data, + { + match &self.beta { + None => panic!("The ridge regression estimator has to be fitted first!"), + Some(beta) => X.dot(beta), + } + } +} diff --git a/linfa-supervised/src/ridge_regression/mod.rs b/linfa-supervised/src/ridge_regression/mod.rs new file mode 100644 index 000000000..4f6742108 --- /dev/null +++ b/linfa-supervised/src/ridge_regression/mod.rs @@ -0,0 +1,3 @@ +mod algorithm; + +pub use algorithm::*; diff --git a/linfa-supervised/src/utils.rs b/linfa-supervised/src/utils.rs new file mode 100644 index 000000000..2d727f391 --- /dev/null +++ b/linfa-supervised/src/utils.rs @@ -0,0 +1,6 @@ +trait GradientDescent { + fn gradient() + + fn optimise(){} + +} diff --git a/src/lib.rs b/src/lib.rs index 40c332e6e..b94b1a324 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,3 +32,7 @@ pub mod clustering { pub use linfa_clustering::*; } + +pub mod supervised { + pub use linfa_supervised::*; +}