From 4f2ccfd581eabc3f3825dd31d66540a45ad06931 Mon Sep 17 00:00:00 2001 From: Thierry Moudiki Date: Tue, 6 Feb 2024 00:49:17 +0100 Subject: [PATCH] add notebook --- ...i_20240206_tuning_BCN_classifier_Pt2.ipynb | 223 ++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 GPopt/demo/thierrymoudiki_20240206_tuning_BCN_classifier_Pt2.ipynb diff --git a/GPopt/demo/thierrymoudiki_20240206_tuning_BCN_classifier_Pt2.ipynb b/GPopt/demo/thierrymoudiki_20240206_tuning_BCN_classifier_Pt2.ipynb new file mode 100644 index 0000000..2ba0c7d --- /dev/null +++ b/GPopt/demo/thierrymoudiki_20240206_tuning_BCN_classifier_Pt2.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ver1MWFiAIH1" + }, + "source": [ + "# 1 - Install" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip uninstall -y BCN GPopt " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "L7dr5u5k8hW9", + "outputId": "f26bd34b-d73b-4fee-b97a-3df46a0d8018" + }, + "outputs": [], + "source": [ + "!pip install BCN --upgrade --no-cache-dir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IBA5fko_BA4V", + "outputId": "122f3221-2fb1-409e-c4e8-17d9d1d8709c" + }, + "outputs": [], + "source": [ + "!pip install GPopt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FV7pttAu8pXP" + }, + "outputs": [], + "source": [ + "import BCN as bcn # takes a long time to run, ONLY the first time it's run\n", + "import GPopt as gp\n", + "import numpy as np\n", + "\n", + "from sklearn.gaussian_process import GaussianProcessRegressor\n", + "from sklearn.gaussian_process.kernels import Matern\n", + "from sklearn.datasets import load_iris, load_wine, load_breast_cancer\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn import metrics\n", + "from time import time" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YniHGO9AAac2" + }, + "source": [ + "# 2 - cv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LIGGEBbxCznR" + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_val_score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fIgPu1vBAd7X" + }, + "outputs": [], + "source": [ + "def bcn_cv(X_train, y_train,\n", + " B = 10, nu = 0.335855,\n", + " lam = 10**0.7837525,\n", + " r = 1 - 10**(-5.470031),\n", + " tol = 10**-7,\n", + " col_sample=1,\n", + " n_clusters = 3):\n", + "\n", + " estimator = bcn.BCNClassifier(B = int(B),\n", + " nu = nu,\n", + " lam = lam,\n", + " r = r,\n", + " tol = tol,\n", + " col_sample = col_sample,\n", + " n_clusters = n_clusters,\n", + " activation=\"tanh\",\n", + " type_optim=\"nlminb\",\n", + " show_progress = False)\n", + "\n", + " return -cross_val_score(estimator, X_train, y_train,\n", + " scoring='accuracy',\n", + " cv=5, n_jobs=None,\n", + " verbose=0).mean()\n", + "\n", + "def optimize_bcn(X_train, y_train):\n", + " # objective function for hyperparams tuning\n", + " def crossval_objective(x):\n", + " return bcn_cv(X_train=X_train,\n", + " y_train=y_train,\n", + " B = int(x[0]),\n", + " nu = 10**x[1],\n", + " lam = 10**x[2],\n", + " r = 1 - 10**x[3],\n", + " tol = 10**x[4],\n", + " col_sample = np.ceil(x[5]),\n", + " n_clusters = np.ceil(x[6]))\n", + " gp_opt = gp.GPOpt(objective_func=crossval_objective,\n", + " lower_bound = np.array([ 3, -6, -10, -10, -6, 0.8, 1]),\n", + " upper_bound = np.array([ 100, -0.1, 10, -1, -0.1, 1, 4]),\n", + " params_names=[\"B\", \"nu\", \"lam\", \"r\", \"tol\", \"col_sample\", \"n_clusters\"],\n", + " gp_obj = GaussianProcessRegressor( # this is where the Gaussian Process can be chosen\n", + " kernel=Matern(nu=1.5),\n", + " alpha=1e-6,\n", + " normalize_y=True,\n", + " n_restarts_optimizer=25,\n", + " random_state=42,\n", + " ),\n", + " n_init=10, n_iter=190, seed=3137)\n", + " return gp_opt.optimize(verbose=2, abs_tol=1e-3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dU2gF9rYC_Q_", + "outputId": "600a5854-6c32-4603-d8b4-d4c0d4a44263" + }, + "outputs": [], + "source": [ + "dataset = load_wine()\n", + "X = dataset.data\n", + "y = dataset.target\n", + "\n", + "# split data into training test and test set\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y,\n", + " test_size=0.2, random_state=3137)\n", + "\n", + "# hyperparams tuning\n", + "res_opt = optimize_bcn(X_train, y_train)\n", + "print(res_opt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zo4LTC_PDEGp" + }, + "outputs": [], + "source": [ + "start = time()\n", + "estimator = bcn.BCNClassifier(B=int(parameters[0][0]),\n", + " nu=10**parameters[0][1],\n", + " lam=10**parameters[0][2],\n", + " r=1-10**parameters[0][3],\n", + " tol=10**parameters[0][4],\n", + " col_sample=np.ceil(parameters[0][5]),\n", + " n_clusters=np.ceil(parameters[0][6]),\n", + " activation=\"tanh\",\n", + " type_optim=\"nlminb\").fit(X_train, y_train)\n", + "print(f\"\\n Elapsed: {time() - start}\")\n", + "start = time()\n", + "print(f\"\\n\\n Test set accuracy: {estimator.score(X_test, y_test)}\")\n", + "print(f\"\\n Elapsed: {time() - start}\")" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}