diff --git a/Cargo.toml b/Cargo.toml
index 5bd52a23b..96701f66a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,6 +13,8 @@ keywords = ["machine-learning", "linfa", "ai", "ml"]
 categories = ["algorithms", "mathematics", "science"]
 
 [dependencies]
+
+linfa-supervised = {path = "linfa-supervised"}
 linfa-clustering = { path = "linfa-clustering", version = "0.1" }
 
 [dev-dependencies]
@@ -22,7 +24,8 @@ rand_isaac = "0.2.0"
 ndarray-npy = { version = "0.5", default-features = false }
 
 [workspace]
-members = ["linfa-clustering"]
+members = ["linfa-clustering", "linfa-supervised"]
 
 [profile.release]
 opt-level = 3
+lto = true
\ No newline at end of file
diff --git a/linfa-supervised/Cargo.toml b/linfa-supervised/Cargo.toml
new file mode 100644
index 000000000..63614e10b
--- /dev/null
+++ b/linfa-supervised/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "linfa-supervised"
+version = "0.1.0"
+authors = ["Khalil HADJI <khalil.hadji@outlook.fr>"]
+edition = "2018"
+description = "A collection of supervised learning algorithms"
+license = "MIT/Apache-2.0"
+
+repository = "https://github.com/LukeMathWalker/linfa"
+
+keywords = ["supervised", "machine-learning", "linfa", "regression", "linear", "ridge", "lasso"]
+categories = ["algorithms", "mathematics", "science"]
+
+[dependencies]
+ndarray = { version = "0.13" , features = ["rayon"] }
+ndarray-linalg = { version = "0.12", features = ["openblas"] }
\ No newline at end of file
diff --git a/linfa-supervised/examples/main.rs b/linfa-supervised/examples/main.rs
new file mode 100644
index 000000000..3020418a9
--- /dev/null
+++ b/linfa-supervised/examples/main.rs
@@ -0,0 +1,40 @@
+use linfa_supervised::LinearRegression;
+use linfa_supervised::RidgeRegression;
+use ndarray::array;
+
+fn linear_regression() {
+    let mut linear_regression = LinearRegression::new(false);
+    let x = array![[1.0], [2.0], [3.0], [4.0]];
+    let y = array![1.0, 2.0, 3.0, 4.0];
+    linear_regression.fit(&x, &y);
+    let x_hat = array![[6.0], [7.0]];
+    println!("{:#?}", linear_regression.predict(&x_hat));
+
+    let mut linear_regression2 = LinearRegression::new(true);
+    let x2 = array![[1.0], [2.0], [3.0], [4.0]];
+    let y2 = array![2.0, 3.0, 4.0, 5.0];
+    linear_regression2.fit(&x2, &y2);
+    let x2_hat = array![[6.0], [7.0]];
+    println!("{:#?}", linear_regression2.predict(&x2_hat));
+}
+
+fn ridge_regression() {
+    let mut ridge_regression = RidgeRegression::new(0.0);
+    let x = array![[1.0], [2.0], [3.0], [4.0]];
+    let y = array![1.0, 2.0, 3.0, 4.0];
+    ridge_regression.fit(&x, &y);
+    let x_hat = array![[6.0], [7.0]];
+    println!("{:#?}", ridge_regression.predict(&x_hat));
+
+    let mut ridge_regression2 = RidgeRegression::new(1.0);
+    let x2 = array![[1.0], [2.0], [3.0], [4.0]];
+    let y2 = array![2.0, 3.0, 4.0, 5.0];
+    ridge_regression2.fit(&x2, &y2);
+    let x2_hat = array![[6.0], [7.0]];
+    println!("{:#?}", ridge_regression2.predict(&x2_hat));
+}
+
+fn main() {
+    linear_regression();
+    ridge_regression();
+}
diff --git a/linfa-supervised/src/lib.rs b/linfa-supervised/src/lib.rs
new file mode 100644
index 000000000..d6091f110
--- /dev/null
+++ b/linfa-supervised/src/lib.rs
@@ -0,0 +1,5 @@
+mod linear_regression;
+mod ridge_regression;
+
+pub use linear_regression::*;
+pub use ridge_regression::*;
diff --git a/linfa-supervised/src/linear_regression/algorithm.rs b/linfa-supervised/src/linear_regression/algorithm.rs
new file mode 100644
index 000000000..32fe16bfd
--- /dev/null
+++ b/linfa-supervised/src/linear_regression/algorithm.rs
@@ -0,0 +1,119 @@
+#![allow(non_snake_case)]
+use ndarray::{stack, Array, Array1, ArrayBase, Axis, Data, Ix1, Ix2};
+use ndarray_linalg::Solve;
+/* I will probably change the implementation for an enum for more type safety.
+I have to make sure, it is a great idea when it comes to pyhton interoperability
+enum Intercept {
+    NoIntercept,
+    Intercept(Array1<f64>)
+}
+pub struct LinearRegressor {
+    beta : Option<Array1<f64>>,
+    intercept : Intercept,
+}
+*/
+
+/*
+If fit_intercept is false, we suppose that the regression passes throught the origin
+*/
+/*
+The simple linear regression model is
+    y = bX + e  where e ~ N(0, sigma^2 * I)
+In probabilistic terms this corresponds to
+    y - bX ~ N(0, sigma^2 * I)
+    y | X, b ~ N(bX, sigma^2 * I)
+The loss for the model is simply the squared error between the model
+predictions and the true values:
+    Loss = ||y - bX||^2
+The maximum likelihood estimation for the model parameters `beta` can be computed
+in closed form via the normal equation:
+    b = (X^T X)^{-1} X^T y
+where (X^T X)^{-1} X^T is known as the pseudoinverse or Moore-Penrose inverse.
+*/
+pub struct LinearRegression {
+    beta: Option<Array1<f64>>,
+    fit_intercept: bool,
+}
+
+impl LinearRegression {
+    pub fn new(fit_intercept: bool) -> LinearRegression {
+        LinearRegression {
+            beta: None,
+            fit_intercept,
+        }
+    }
+    /* Instead of assert_eq we should probably return a Result, we first have to have a generic error type for all algorithms */
+    pub fn fit<A, B>(&mut self, X: &ArrayBase<A, Ix2>, Y: &ArrayBase<B, Ix1>)
+    where
+        A: Data<Elem = f64>,
+        B: Data<Elem = f64>,
+    {
+        let (n_samples, _) = X.dim();
+
+        // We have to make sure that the dimensions match
+        assert_eq!(Y.dim(), n_samples);
+
+        self.beta = if self.fit_intercept {
+            let dummy_column: Array<f64, _> = Array::ones((n_samples, 1));
+            /*
+                if x is has 2 features and 3 samples
+                x = [[1,2]
+                    ,[3,4]
+                    ,[5,6]]
+                dummy_column = [[1]
+                               ,[1]
+                               ,[1]]
+            */
+            let X_with_ones = stack(Axis(1), &[dummy_column.view(), X.view()]).unwrap();
+            Some(LinearRegression::fit_beta(&X_with_ones, Y))
+        } else {
+            Some(LinearRegression::fit_beta(X, Y))
+        }
+    }
+    fn fit_beta<A, B>(X: &ArrayBase<A, Ix2>, y: &ArrayBase<B, Ix1>) -> Array1<f64>
+    where
+        A: Data<Elem = f64>,
+        B: Data<Elem = f64>,
+    {
+        let rhs = X.t().dot(y);
+        let linear_operator = X.t().dot(X);
+        linear_operator.solve_into(rhs).unwrap()
+    }
+
+    pub fn predict<A>(&self, X: &ArrayBase<A, Ix2>) -> Array1<f64>
+    where
+        A: Data<Elem = f64>,
+    {
+        let (n_samples, _) = X.dim();
+
+        // If we are fitting the intercept, we need an additional column
+        if self.fit_intercept {
+            let dummy_column: Array<f64, _> = Array::ones((n_samples, 1));
+            let X = stack(Axis(1), &[dummy_column.view(), X.view()]).unwrap();
+            match &self.beta {
+                None => panic!("The linear regression estimator has to be fitted first!"),
+                Some(beta) => X.dot(beta),
+            }
+        } else {
+            match &self.beta {
+                None => panic!("The linear regression estimator has to be fitted first!"),
+                Some(beta) => X.dot(beta),
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use ndarray::array;
+    #[test]
+    fn linear_regression_test() {
+        let mut linear_regression = LinearRegression::new(false);
+        let x = array![[1.0], [2.0], [3.0], [4.0]];
+        let y = array![1.0, 2.0, 3.0, 4.0];
+        linear_regression.fit(&x, &y);
+        let x_hat = array![[6.0]];
+        assert_eq!(linear_regression.predict(&x_hat), array![6.0])
+    }
+}
diff --git a/linfa-supervised/src/linear_regression/mod.rs b/linfa-supervised/src/linear_regression/mod.rs
new file mode 100644
index 000000000..4f6742108
--- /dev/null
+++ b/linfa-supervised/src/linear_regression/mod.rs
@@ -0,0 +1,3 @@
+mod algorithm;
+
+pub use algorithm::*;
diff --git a/linfa-supervised/src/ridge_regression/algorithm.rs b/linfa-supervised/src/ridge_regression/algorithm.rs
new file mode 100644
index 000000000..55ee32f9a
--- /dev/null
+++ b/linfa-supervised/src/ridge_regression/algorithm.rs
@@ -0,0 +1,44 @@
+#![allow(non_snake_case)]
+use ndarray::{Array, Array1, ArrayBase, Data, Ix1, Ix2};
+use ndarray_linalg::Solve;
+/* The difference between a linear regression and a Ridge regression is
+ that ridge regression has an L2 penalisation term to having some features
+ "taking all the credit" for the output. It is also a way to deal with over-fitting by adding bias.
+ Some details ...
+ b = (X^T X + aI)X^T y with a being the regularisation/penalisation term
+*/
+
+pub struct RidgeRegression {
+    beta: Option<Array1<f64>>,
+    alpha: f64,
+}
+
+impl RidgeRegression {
+    pub fn new(alpha: f64) -> RidgeRegression {
+        RidgeRegression {
+            beta: None,
+            alpha: alpha,
+        }
+    }
+
+    pub fn fit<A, B>(&mut self, X: &ArrayBase<A, Ix2>, Y: &ArrayBase<B, Ix1>)
+    where
+        A: Data<Elem = f64>,
+        B: Data<Elem = f64>,
+    {
+        let second_term = X.t().dot(Y);
+        let (_, identity_size) = X.dim();
+        let linear_operator = X.t().dot(X) + self.alpha * Array::eye(identity_size);
+        self.beta = Some(linear_operator.solve_into(second_term).unwrap());
+    }
+
+    pub fn predict<A>(&self, X: &ArrayBase<A, Ix2>) -> Array1<f64>
+    where
+        A: Data<Elem = f64>,
+    {
+        match &self.beta {
+            None => panic!("The ridge regression estimator has to be fitted first!"),
+            Some(beta) => X.dot(beta),
+        }
+    }
+}
diff --git a/linfa-supervised/src/ridge_regression/mod.rs b/linfa-supervised/src/ridge_regression/mod.rs
new file mode 100644
index 000000000..4f6742108
--- /dev/null
+++ b/linfa-supervised/src/ridge_regression/mod.rs
@@ -0,0 +1,3 @@
+mod algorithm;
+
+pub use algorithm::*;
diff --git a/linfa-supervised/src/utils.rs b/linfa-supervised/src/utils.rs
new file mode 100644
index 000000000..2d727f391
--- /dev/null
+++ b/linfa-supervised/src/utils.rs
@@ -0,0 +1,6 @@
+trait GradientDescent {
+    fn gradient()
+
+    fn optimise(){}
+
+}
diff --git a/src/lib.rs b/src/lib.rs
index 40c332e6e..b94b1a324 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -32,3 +32,7 @@
 pub mod clustering {
     pub use linfa_clustering::*;
 }
+
+pub mod supervised {
+    pub use linfa_supervised::*;
+}