From 86219ce748e1d11e589818938984615c597a82fa Mon Sep 17 00:00:00 2001 From: mathangpeddi Date: Tue, 1 Oct 2024 20:27:22 -0400 Subject: [PATCH 1/8] Added Catboost Algorithm in Machine Learning --- machine_learning/catboost_classifier.py | 71 +++++++++++++++++++++++++ machine_learning/catboost_regressor.py | 66 +++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 machine_learning/catboost_classifier.py create mode 100644 machine_learning/catboost_regressor.py diff --git a/machine_learning/catboost_classifier.py b/machine_learning/catboost_classifier.py new file mode 100644 index 000000000000..d874f372b42d --- /dev/null +++ b/machine_learning/catboost_classifier.py @@ -0,0 +1,71 @@ +import numpy as np +from matplotlib import pyplot as plt +from sklearn.datasets import load_iris +from sklearn.metrics import ConfusionMatrixDisplay +from sklearn.model_selection import train_test_split +from catboost import CatBoostClassifier + + +def data_handling(data: dict) -> tuple: + # Split dataset into features and target + # data is features + """ + >>> data_handling(({'data':'[5.1, 3.5, 1.4, 0.2]','target':([0])})) + ('[5.1, 3.5, 1.4, 0.2]', [0]) + >>> data_handling( + ... {'data': '[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', 'target': ([0, 0])} + ... ) + ('[4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2]', [0, 0]) + """ + return (data["data"], data["target"]) + + +def catboost(features: np.ndarray, target: np.ndarray) -> CatBoostClassifier: + """ + >>> catboost(np.array([[5.1, 3.6, 1.4, 0.2]]), np.array([0])) + + """ + classifier = CatBoostClassifier(verbose=0) + classifier.fit(features, target) + return classifier + + +def main() -> None: + """ + >>> main() + + Url for the algorithm: + https://catboost.ai/ + Iris type dataset is used to demonstrate algorithm. + """ + + # Load Iris dataset + iris = load_iris() + features, targets = data_handling(iris) + x_train, x_test, y_train, y_test = train_test_split( + features, targets, test_size=0.25 + ) + + names = iris["target_names"] + + # Create a CatBoost Classifier from the training data + catboost_classifier = catboost(x_train, y_train) + + # Display the confusion matrix of the classifier with both training and test sets + ConfusionMatrixDisplay.from_estimator( + catboost_classifier, + x_test, + y_test, + display_labels=names, + cmap="Blues", + normalize="true", + ) + plt.title("Normalized Confusion Matrix - IRIS Dataset (CatBoost)") + plt.show() + + +if __name__ == "__main__": + import doctest + + doctest.testmod(verbose=True) + main() diff --git a/machine_learning/catboost_regressor.py b/machine_learning/catboost_regressor.py new file mode 100644 index 000000000000..472cd10cd8c1 --- /dev/null +++ b/machine_learning/catboost_regressor.py @@ -0,0 +1,66 @@ +import numpy as np +from sklearn.datasets import fetch_california_housing +from sklearn.metrics import mean_absolute_error, mean_squared_error +from sklearn.model_selection import train_test_split +from catboost import CatBoostRegressor + + +def data_handling(data: dict) -> tuple: + # Split dataset into features and target. Data is features. + """ + >>> data_handling(( + ... {'data':'[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]' + ... ,'target':([4.526])})) + ('[ 8.3252 41. 6.9841269 1.02380952 322. 2.55555556 37.88 -122.23 ]', [4.526]) + """ + return (data["data"], data["target"]) + + +def catboost( + features: np.ndarray, target: np.ndarray, test_features: np.ndarray +) -> np.ndarray: + """ + >>> catboost(np.array([[ 2.3571 , 52. , 6.00813008, 1.06775068, + ... 907. , 2.45799458, 40.58 , -124.26]]),np.array([1.114]), + ... np.array([[1.97840000e+00, 3.70000000e+01, 4.98858447e+00, 1.03881279e+00, + ... 1.14300000e+03, 2.60958904e+00, 3.67800000e+01, -1.19780000e+02]])) + array([1.1139996]) + """ + # Create and fit the CatBoost Regressor + catboost_model = CatBoostRegressor( + verbose=0, random_seed=42, loss_function="RMSE" + ) + catboost_model.fit(features, target) + # Predict target for test data + predictions = catboost_model.predict(test_features) + predictions = predictions.reshape(len(predictions), 1) + return predictions + + +def main() -> None: + """ + The URL for this algorithm: + https://catboost.ai/ + California house price dataset is used to demonstrate the algorithm. + + Expected error values: + Mean Absolute Error: 0.30957163379906033 + Mean Square Error: 0.22611560196662744 + """ + # Load California house price dataset + california = fetch_california_housing() + data, target = data_handling(california) + x_train, x_test, y_train, y_test = train_test_split( + data, target, test_size=0.25, random_state=1 + ) + predictions = catboost(x_train, y_train, x_test) + # Error printing + print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}") + print(f"Mean Square Error: {mean_squared_error(y_test, predictions)}") + + +if __name__ == "__main__": + import doctest + + doctest.testmod(verbose=True) + main() From dca7ae588bb342ab896fbe8f5ea816bee6fbb143 Mon Sep 17 00:00:00 2001 From: mathangpeddi Date: Tue, 1 Oct 2024 20:30:16 -0400 Subject: [PATCH 2/8] Added first line --- machine_learning/catboost_classifier.py | 1 + machine_learning/catboost_regressor.py | 1 + 2 files changed, 2 insertions(+) diff --git a/machine_learning/catboost_classifier.py b/machine_learning/catboost_classifier.py index d874f372b42d..97d7a792b9c0 100644 --- a/machine_learning/catboost_classifier.py +++ b/machine_learning/catboost_classifier.py @@ -1,3 +1,4 @@ +# Catboost Classifier Example import numpy as np from matplotlib import pyplot as plt from sklearn.datasets import load_iris diff --git a/machine_learning/catboost_regressor.py b/machine_learning/catboost_regressor.py index 472cd10cd8c1..d3f5ab80df14 100644 --- a/machine_learning/catboost_regressor.py +++ b/machine_learning/catboost_regressor.py @@ -1,3 +1,4 @@ +# Catboost Regressor Example import numpy as np from sklearn.datasets import fetch_california_housing from sklearn.metrics import mean_absolute_error, mean_squared_error From a5d81129f96d9df5dfefb9c7f0bea02533218a36 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 00:30:24 +0000 Subject: [PATCH 3/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/catboost_regressor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/machine_learning/catboost_regressor.py b/machine_learning/catboost_regressor.py index 472cd10cd8c1..1ac4ba78753c 100644 --- a/machine_learning/catboost_regressor.py +++ b/machine_learning/catboost_regressor.py @@ -27,9 +27,7 @@ def catboost( array([1.1139996]) """ # Create and fit the CatBoost Regressor - catboost_model = CatBoostRegressor( - verbose=0, random_seed=42, loss_function="RMSE" - ) + catboost_model = CatBoostRegressor(verbose=0, random_seed=42, loss_function="RMSE") catboost_model.fit(features, target) # Predict target for test data predictions = catboost_model.predict(test_features) From 23f5e8f9374cac9daecf761ed52cda966622bee6 Mon Sep 17 00:00:00 2001 From: mathangpeddi Date: Tue, 1 Oct 2024 20:48:32 -0400 Subject: [PATCH 4/8] Added to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index afbf25ba6edc..27dff1d4712d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ tweepy # yulewalker # uncomment once audio_filters/equal_loudness_filter.py is fixed typing_extensions xgboost +catboost From 73475dcc818b7147c85a666f699cbfe8bc819bd8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 00:49:03 +0000 Subject: [PATCH 5/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 27dff1d4712d..d9fa2ecb8b5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ beautifulsoup4 +catboost fake_useragent imageio keras @@ -22,4 +23,3 @@ tweepy # yulewalker # uncomment once audio_filters/equal_loudness_filter.py is fixed typing_extensions xgboost -catboost From 002bbd83a57dd0416a6f8d87fd65014890fd5196 Mon Sep 17 00:00:00 2001 From: mathangpeddi Date: Tue, 1 Oct 2024 20:55:51 -0400 Subject: [PATCH 6/8] Fixed lint errors --- machine_learning/catboost_classifier.py | 2 +- machine_learning/catboost_regressor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/machine_learning/catboost_classifier.py b/machine_learning/catboost_classifier.py index 97d7a792b9c0..06dd88a299bd 100644 --- a/machine_learning/catboost_classifier.py +++ b/machine_learning/catboost_classifier.py @@ -1,10 +1,10 @@ # Catboost Classifier Example import numpy as np +from catboost import CatBoostClassifier from matplotlib import pyplot as plt from sklearn.datasets import load_iris from sklearn.metrics import ConfusionMatrixDisplay from sklearn.model_selection import train_test_split -from catboost import CatBoostClassifier def data_handling(data: dict) -> tuple: diff --git a/machine_learning/catboost_regressor.py b/machine_learning/catboost_regressor.py index c1c9fe1184a5..35eb409cb00b 100644 --- a/machine_learning/catboost_regressor.py +++ b/machine_learning/catboost_regressor.py @@ -1,9 +1,9 @@ # Catboost Regressor Example import numpy as np +from catboost import CatBoostRegressor from sklearn.datasets import fetch_california_housing from sklearn.metrics import mean_absolute_error, mean_squared_error from sklearn.model_selection import train_test_split -from catboost import CatBoostRegressor def data_handling(data: dict) -> tuple: From 073c62aba8f7b4d6b6791ae0f84ab998e1290c79 Mon Sep 17 00:00:00 2001 From: mathangpeddi Date: Tue, 1 Oct 2024 20:56:34 -0400 Subject: [PATCH 7/8] Change requirements file --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index d9fa2ecb8b5e..32a32b52e129 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ tweepy # yulewalker # uncomment once audio_filters/equal_loudness_filter.py is fixed typing_extensions xgboost +catboost \ No newline at end of file From 5272789af012ad82b68c14996c98ddb49f59dff8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 00:57:15 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 32a32b52e129..d9fa2ecb8b5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,3 @@ tweepy # yulewalker # uncomment once audio_filters/equal_loudness_filter.py is fixed typing_extensions xgboost -catboost \ No newline at end of file