From ce540c7d20e58d42d51019d516fb40771ece498f Mon Sep 17 00:00:00 2001
From: Sanket Nikam <sanketn734@gmail.com>
Date: Wed, 25 Oct 2023 18:06:58 +0530
Subject: [PATCH 1/7] Added Gradient Boosting Classifier

---
 .../gradient_boosting_classifier.py           | 95 +++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 machine_learning/gradient_boosting_classifier.py

diff --git a/machine_learning/gradient_boosting_classifier.py b/machine_learning/gradient_boosting_classifier.py
new file mode 100644
index 000000000000..2776f717d0a4
--- /dev/null
+++ b/machine_learning/gradient_boosting_classifier.py
@@ -0,0 +1,95 @@
+
+import numpy as np
+from sklearn.datasets import load_iris
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+from sklearn.tree import DecisionTreeRegressor
+
+
+class GradientBoostingClassifier:
+    def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
+        """
+        Initialize a GradientBoostingClassifier.
+
+        Parameters:
+        - n_estimators (int): The number of weak learners to train.
+        - learning_rate (float): The learning rate for updating the model.
+
+        Attributes:
+        - n_estimators (int): The number of weak learners.
+        - learning_rate (float): The learning rate.
+        - models (list): A list to store the trained weak learners.
+        """
+        self.n_estimators = n_estimators
+        self.learning_rate = learning_rate
+        self.models: list[tuple[DecisionTreeRegressor, float]] = []
+
+    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
+        """
+        Fit the GradientBoostingClassifier to the training data.
+
+        Parameters:
+        - x (np.ndarray): The training features.
+        - y (np.ndarray): The target values.
+
+        Returns:
+        None
+        """
+        for _ in range(self.n_estimators):
+            # Calculate the pseudo-residuals
+            residuals = -self.gradient(y, self.predict(x))
+            # Fit a weak learner (e.g., decision tree) to the residuals
+            model = DecisionTreeRegressor(max_depth=1)
+            model.fit(x, residuals)
+            # Update the model by adding the weak learner with a learning rate
+            self.models.append((model, self.learning_rate))
+
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        """
+        Make predictions on input data.
+
+        Parameters:
+        - x (np.ndarray): The input data for making predictions.
+
+        Returns:
+        - np.ndarray: An array of binary predictions (-1 or 1).
+        """
+        # Initialize predictions with zeros
+        predictions = np.zeros(x.shape[0])
+        for model, learning_rate in self.models:
+            predictions += learning_rate * model.predict(x)
+        return np.sign(predictions)  # Convert to binary predictions (-1 or 1)
+
+    def gradient(self, y: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
+        """
+        Calculate the negative gradient (pseudo-residuals) for logistic loss.
+
+        Parameters:
+        - y (np.ndarray): The target values.
+        - y_pred (np.ndarray): The predicted values.
+
+        Returns:
+        - np.ndarray: An array of pseudo-residuals.
+        """
+        return -y / (1 + np.exp(y * y_pred))
+
+
+if __name__ == "__main__":
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
+    clf.fit(X_train, y_train)
+
+    y_pred = clf.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    print(f"Accuracy: {accuracy:.2f}")
+
+# Perform some calculations in doctests
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()

From 171a42c780a766ec8d14c30fa18021fad7e31d5d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 25 Oct 2023 12:41:16 +0000
Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/gradient_boosting_classifier.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/machine_learning/gradient_boosting_classifier.py b/machine_learning/gradient_boosting_classifier.py
index 2776f717d0a4..92da6b159b30 100644
--- a/machine_learning/gradient_boosting_classifier.py
+++ b/machine_learning/gradient_boosting_classifier.py
@@ -1,4 +1,3 @@
-
 import numpy as np
 from sklearn.datasets import load_iris
 from sklearn.metrics import accuracy_score

From ec6ed8cb4c097f423fdad8e0e78473536de85341 Mon Sep 17 00:00:00 2001
From: Sanket Nikam <77570082+SannketNikam@users.noreply.github.com>
Date: Wed, 25 Oct 2023 18:31:27 +0530
Subject: [PATCH 3/7] Update gradient_boosting_classifier.py

---
 machine_learning/gradient_boosting_classifier.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/machine_learning/gradient_boosting_classifier.py b/machine_learning/gradient_boosting_classifier.py
index 92da6b159b30..92652d5fdc27 100644
--- a/machine_learning/gradient_boosting_classifier.py
+++ b/machine_learning/gradient_boosting_classifier.py
@@ -75,15 +75,15 @@ def gradient(self, y: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
 
 if __name__ == "__main__":
     iris = load_iris()
-    X, y = iris.data, iris.target
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.2, random_state=42
+    x, y = iris.data, iris.target
+    x_train, x_test, y_train, y_test = train_test_split(
+        x, y, test_size=0.2, random_state=42
     )
 
     clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
-    clf.fit(X_train, y_train)
+    clf.fit(x_train, y_train)
 
-    y_pred = clf.predict(X_test)
+    y_pred = clf.predict(x_test)
     accuracy = accuracy_score(y_test, y_pred)
     print(f"Accuracy: {accuracy:.2f}")
 

From 0cbc5a51d968888e1f025673f35530b93dcd2e1c Mon Sep 17 00:00:00 2001
From: Sanket Nikam <77570082+SannketNikam@users.noreply.github.com>
Date: Wed, 25 Oct 2023 18:40:33 +0530
Subject: [PATCH 4/7] Update gradient_boosting_classifier.py

---
 .../gradient_boosting_classifier.py           | 72 ++++++++++++-------
 1 file changed, 48 insertions(+), 24 deletions(-)

diff --git a/machine_learning/gradient_boosting_classifier.py b/machine_learning/gradient_boosting_classifier.py
index 92652d5fdc27..55e3943be7d4 100644
--- a/machine_learning/gradient_boosting_classifier.py
+++ b/machine_learning/gradient_boosting_classifier.py
@@ -21,74 +21,98 @@ def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
         """
         self.n_estimators = n_estimators
         self.learning_rate = learning_rate
-        self.models: list[tuple[DecisionTreeRegressor, float]] = []
+        self.models = []
 
-    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
+    def fit(self, features: np.ndarray, target: np.ndarray) -> None:
         """
         Fit the GradientBoostingClassifier to the training data.
 
         Parameters:
-        - x (np.ndarray): The training features.
-        - y (np.ndarray): The target values.
+        - features (np.ndarray): The training features.
+        - target (np.ndarray): The target values.
 
         Returns:
         None
+
+        >>> import numpy as np
+        >>> from sklearn.datasets import load_iris
+        >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
+        >>> iris = load_iris()
+        >>> X, y = iris.data, iris.target
+        >>> clf.fit(X, y)
+        >>> # Check if the model is trained
+        >>> len(clf.models) == 100
+        True
         """
         for _ in range(self.n_estimators):
             # Calculate the pseudo-residuals
-            residuals = -self.gradient(y, self.predict(x))
+            residuals = -self.gradient(target, self.predict(features))
             # Fit a weak learner (e.g., decision tree) to the residuals
             model = DecisionTreeRegressor(max_depth=1)
-            model.fit(x, residuals)
+            model.fit(features, residuals)
             # Update the model by adding the weak learner with a learning rate
             self.models.append((model, self.learning_rate))
 
-    def predict(self, x: np.ndarray) -> np.ndarray:
+    def predict(self, features: np.ndarray) -> np.ndarray:
         """
         Make predictions on input data.
 
         Parameters:
-        - x (np.ndarray): The input data for making predictions.
+        - features (np.ndarray): The input data for making predictions.
 
         Returns:
         - np.ndarray: An array of binary predictions (-1 or 1).
+
+        >>> import numpy as np
+        >>> from sklearn.datasets import load_iris
+        >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
+        >>> iris = load_iris()
+        >>> X, y = iris.data, iris.target
+        >>> clf.fit(X, y)
+        >>> y_pred = clf.predict(X)
+        >>> # Check if the predictions have the correct shape
+        >>> y_pred.shape == y.shape
+        True
         """
         # Initialize predictions with zeros
-        predictions = np.zeros(x.shape[0])
+        predictions = np.zeros(features.shape[0])
         for model, learning_rate in self.models:
-            predictions += learning_rate * model.predict(x)
+            predictions += learning_rate * model.predict(features)
         return np.sign(predictions)  # Convert to binary predictions (-1 or 1)
 
-    def gradient(self, y: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
+    def gradient(self, target: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
         """
         Calculate the negative gradient (pseudo-residuals) for logistic loss.
 
         Parameters:
-        - y (np.ndarray): The target values.
+        - target (np.ndarray): The target values.
         - y_pred (np.ndarray): The predicted values.
 
         Returns:
         - np.ndarray: An array of pseudo-residuals.
+
+        >>> import numpy as np
+        >>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
+        >>> target = np.array([0, 1, 0, 1])
+        >>> y_pred = np.array([0.2, 0.8, 0.3, 0.7])
+        >>> residuals = clf.gradient(target, y_pred)
+        >>> # Check if residuals have the correct shape
+        >>> residuals.shape == target.shape
+        True
         """
-        return -y / (1 + np.exp(y * y_pred))
+        return -target / (1 + np.exp(target * y_pred))
 
 
 if __name__ == "__main__":
     iris = load_iris()
-    x, y = iris.data, iris.target
-    x_train, x_test, y_train, y_test = train_test_split(
-        x, y, test_size=0.2, random_state=42
+    X, y = iris.data, iris.target
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
     )
 
     clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
-    clf.fit(x_train, y_train)
+    clf.fit(X_train, y_train)
 
-    y_pred = clf.predict(x_test)
+    y_pred = clf.predict(X_test)
     accuracy = accuracy_score(y_test, y_pred)
     print(f"Accuracy: {accuracy:.2f}")
-
-# Perform some calculations in doctests
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()

From 6661e12931335508402bc762ae3814adf5a117fd Mon Sep 17 00:00:00 2001
From: Sanket Nikam <77570082+SannketNikam@users.noreply.github.com>
Date: Wed, 25 Oct 2023 18:55:27 +0530
Subject: [PATCH 5/7] Update gradient_boosting_classifier.py

---
 machine_learning/gradient_boosting_classifier.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/machine_learning/gradient_boosting_classifier.py b/machine_learning/gradient_boosting_classifier.py
index 55e3943be7d4..c54ceb3eef0a 100644
--- a/machine_learning/gradient_boosting_classifier.py
+++ b/machine_learning/gradient_boosting_classifier.py
@@ -3,6 +3,7 @@
 from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeRegressor
+from typing import List, Tuple
 
 
 class GradientBoostingClassifier:
@@ -21,7 +22,7 @@ def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
         """
         self.n_estimators = n_estimators
         self.learning_rate = learning_rate
-        self.models = []
+        self.models: List[Tuple[DecisionTreeRegressor, float]] = []
 
     def fit(self, features: np.ndarray, target: np.ndarray) -> None:
         """

From 63bcd54f4d4d5dfb3a382abae478cf39c73d5094 Mon Sep 17 00:00:00 2001
From: Sanket Nikam <77570082+SannketNikam@users.noreply.github.com>
Date: Wed, 25 Oct 2023 19:01:59 +0530
Subject: [PATCH 6/7] Update gradient_boosting_classifier.py

---
 machine_learning/gradient_boosting_classifier.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/machine_learning/gradient_boosting_classifier.py b/machine_learning/gradient_boosting_classifier.py
index c54ceb3eef0a..70967b541002 100644
--- a/machine_learning/gradient_boosting_classifier.py
+++ b/machine_learning/gradient_boosting_classifier.py
@@ -3,8 +3,6 @@
 from sklearn.metrics import accuracy_score
 from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeRegressor
-from typing import List, Tuple
-
 
 class GradientBoostingClassifier:
     def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
@@ -22,7 +20,7 @@ def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
         """
         self.n_estimators = n_estimators
         self.learning_rate = learning_rate
-        self.models: List[Tuple[DecisionTreeRegressor, float]] = []
+        self.models: list[tuple[DecisionTreeRegressor, float]] = []
 
     def fit(self, features: np.ndarray, target: np.ndarray) -> None:
         """

From 3af39b5ee75df7439dd936212d9077bde9d9cf96 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 25 Oct 2023 13:32:43 +0000
Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/gradient_boosting_classifier.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/machine_learning/gradient_boosting_classifier.py b/machine_learning/gradient_boosting_classifier.py
index 70967b541002..2902394d8226 100644
--- a/machine_learning/gradient_boosting_classifier.py
+++ b/machine_learning/gradient_boosting_classifier.py
@@ -4,6 +4,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeRegressor
 
+
 class GradientBoostingClassifier:
     def __init__(self, n_estimators: int = 100, learning_rate: float = 0.1) -> None:
         """