diff --git a/InsurAutoML/VERSION b/InsurAutoML/VERSION
index 7179039..abd4105 100644
--- a/InsurAutoML/VERSION
+++ b/InsurAutoML/VERSION
@@ -1 +1 @@
-0.2.3
+0.2.4
diff --git a/InsurAutoML/hpo/base.py b/InsurAutoML/hpo/base.py
index fac7d6c..06a7d63 100644
--- a/InsurAutoML/hpo/base.py
+++ b/InsurAutoML/hpo/base.py
@@ -5,13 +5,13 @@
Mathematics Department, University of Illinois at Urbana-Champaign (UIUC)
Project: InsurAutoML
-Latest Version: 0.2.3
+Latest Version: 0.2.4
Relative Path: /InsurAutoML/hpo/base.py
File: _base.py
Author: Panyi Dong (panyid2@illinois.edu)
-----
-Last Modified: Monday, 28th November 2022 11:36:14 pm
+Last Modified: Friday, 3rd February 2023 12:32:28 am
Modified By: Panyi Dong (panyid2@illinois.edu)
-----
@@ -121,7 +121,7 @@
device_count = 0
-class AutoTabularBase(MetaData):
+class AutoTabularBase:
""" "
Base class module for AutoTabular (for classification and regression tasks)
@@ -330,7 +330,11 @@ def get_hyperparameter_space(
# Encoding: convert string types to numerical type
# all encoders available
from InsurAutoML.encoding import encoders
- from additional import add_encoders
+ # if additional exists, import, otherwise set to default
+ try :
+ from additional import add_encoders
+ except:
+ add_encoders = {}
# include original encoders
self._all_encoders = copy.deepcopy(encoders)
@@ -361,7 +365,11 @@ def get_hyperparameter_space(
# Imputer: fill missing values
# all imputers available
from InsurAutoML.imputation import imputers
- from additional import add_imputers
+ # if additional exists, import, otherwise set to default
+ try :
+ from additional import add_imputers
+ except :
+ add_imputers = {}
# include original imputers
self._all_imputers = copy.deepcopy(imputers)
@@ -401,7 +409,11 @@ def get_hyperparameter_space(
# Balancing: deal with imbalanced dataset, using over-/under-sampling methods
# all balancings available
from InsurAutoML.balancing import balancings
- from additional import add_balancings
+ # if additional exists, import, otherwise set to default
+ try :
+ from additional import add_balancings
+ except :
+ add_balancings = {}
# include original balancings
self._all_balancings = copy.deepcopy(balancings)
@@ -426,7 +438,11 @@ def get_hyperparameter_space(
# Scaling
# all scalings available
from InsurAutoML.scaling import scalings
- from additional import add_scalings
+ # if additional exists, import, otherwise set to default
+ try :
+ from additional import add_scalings
+ except :
+ add_scalings = {}
# include original scalings
self._all_scalings = copy.deepcopy(scalings)
@@ -451,7 +467,11 @@ def get_hyperparameter_space(
# Feature selection: Remove redundant features, reduce dimensionality
# all feature selections available
from InsurAutoML.feature_selection import feature_selections
- from additional import add_feature_selections
+ # if additional exists, import, otherwise set to default
+ try :
+ from additional import add_feature_selections
+ except :
+ add_feature_selections = {}
# include original feature selections
self._all_feature_selection = copy.deepcopy(feature_selections)
@@ -502,7 +522,11 @@ def get_hyperparameter_space(
# if mode is regression, use regression models
if self.task_mode == "classification":
from InsurAutoML.model import classifiers
- from additional import add_classifiers
+ # if additional exists, import, otherwise set to default
+ try :
+ from additional import add_classifiers
+ except :
+ add_classifiers = {}
# include original classifiers
self._all_models = copy.deepcopy(classifiers)
@@ -510,7 +534,11 @@ def get_hyperparameter_space(
self._all_models.update(add_classifiers)
elif self.task_mode == "regression":
from InsurAutoML.model import regressors
- from additional import add_regressors
+ # if additional exists, import, otherwise set to default
+ try :
+ from additional import add_regressors
+ except :
+ add_regressors = {}
# include original regressors
self._all_models = copy.deepcopy(regressors)
@@ -560,15 +588,25 @@ def get_hyperparameter_space(
regressor_hyperparameter,
)
- from additional import (
- add_encoder_hyperparameter,
- add_imputer_hyperparameter,
- add_scaling_hyperparameter,
- add_balancing_hyperparameter,
- add_feature_selection_hyperparameter,
- add_classifier_hyperparameter,
- add_regressor_hyperparameter,
- )
+ # if additional exists, import, otherwise set to default
+ try :
+ from additional import (
+ add_encoder_hyperparameter,
+ add_imputer_hyperparameter,
+ add_scaling_hyperparameter,
+ add_balancing_hyperparameter,
+ add_feature_selection_hyperparameter,
+ add_classifier_hyperparameter,
+ add_regressor_hyperparameter,
+ )
+ except :
+ add_encoder_hyperparameter = {}
+ add_imputer_hyperparameter = {}
+ add_scaling_hyperparameter = {}
+ add_balancing_hyperparameter = {}
+ add_feature_selection_hyperparameter = {}
+ add_classifier_hyperparameter = {}
+ add_regressor_hyperparameter = {}
# if needed, modify default hyperparameter space
# like model hyperparameter space below
@@ -1188,7 +1226,8 @@ def fit(
type(y)))
# get data metadata
- super(AutoTabularBase, self).__init__(X)
+ if not hasattr(self, "metadata") :
+ self.metadata = MetaData(X).metadata
# check if there's unsupported data type
# if datetime ,recommend to remove
if ("Datetime", "") in self.metadata.keys():
diff --git a/doc/Automated Machine Learning (AutoML) in Insurance.pdf b/doc/Automated Machine Learning (AutoML) in Insurance.pdf
index c37ee88..d214540 100644
Binary files a/doc/Automated Machine Learning (AutoML) in Insurance.pdf and b/doc/Automated Machine Learning (AutoML) in Insurance.pdf differ
diff --git a/example/Heart Failure Prediction.ipynb b/example/Heart Failure Prediction.ipynb
index da4ad9a..b3fca6f 100644
--- a/example/Heart Failure Prediction.ipynb
+++ b/example/Heart Failure Prediction.ipynb
@@ -2,13 +2,13 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import InsurAutoML\n",
"from InsurAutoML import load_data, AutoTabular\n",
- "from InsurAutoML._utils import train_test_split"
+ "from InsurAutoML.utils import train_test_split"
]
},
{
@@ -21,7 +21,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -32,152 +32,16 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Age | \n",
- " Sex | \n",
- " ChestPainType | \n",
- " RestingBP | \n",
- " Cholesterol | \n",
- " FastingBS | \n",
- " RestingECG | \n",
- " MaxHR | \n",
- " ExerciseAngina | \n",
- " Oldpeak | \n",
- " ST_Slope | \n",
- " HeartDisease | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 40 | \n",
- " M | \n",
- " ATA | \n",
- " 140 | \n",
- " 289 | \n",
- " 0 | \n",
- " Normal | \n",
- " 172 | \n",
- " N | \n",
- " 0.0 | \n",
- " Up | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 49 | \n",
- " F | \n",
- " NAP | \n",
- " 160 | \n",
- " 180 | \n",
- " 0 | \n",
- " Normal | \n",
- " 156 | \n",
- " N | \n",
- " 1.0 | \n",
- " Flat | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 37 | \n",
- " M | \n",
- " ATA | \n",
- " 130 | \n",
- " 283 | \n",
- " 0 | \n",
- " ST | \n",
- " 98 | \n",
- " N | \n",
- " 0.0 | \n",
- " Up | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 48 | \n",
- " F | \n",
- " ASY | \n",
- " 138 | \n",
- " 214 | \n",
- " 0 | \n",
- " Normal | \n",
- " 108 | \n",
- " Y | \n",
- " 1.5 | \n",
- " Flat | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 54 | \n",
- " M | \n",
- " NAP | \n",
- " 150 | \n",
- " 195 | \n",
- " 0 | \n",
- " Normal | \n",
- " 122 | \n",
- " N | \n",
- " 0.0 | \n",
- " Up | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Age Sex ChestPainType RestingBP Cholesterol FastingBS RestingECG MaxHR \\\n",
- "0 40 M ATA 140 289 0 Normal 172 \n",
- "1 49 F NAP 160 180 0 Normal 156 \n",
- "2 37 M ATA 130 283 0 ST 98 \n",
- "3 48 F ASY 138 214 0 Normal 108 \n",
- "4 54 M NAP 150 195 0 Normal 122 \n",
- "\n",
- " ExerciseAngina Oldpeak ST_Slope HeartDisease \n",
- "0 N 0.0 Up 0 \n",
- "1 N 1.0 Flat 1 \n",
- "2 N 0.0 Up 0 \n",
- "3 Y 1.5 Flat 1 \n",
- "4 N 0.0 Up 0 "
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"database['heart'].head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -189,7 +53,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -201,20 +65,9 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# fit AutoML model\n",
"mol = AutoTabular(seed = 1)\n",
@@ -223,20 +76,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.8686131386861314"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"from sklearn.metrics import accuracy_score\n",
"y_pred = mol.predict(test_X)\n",
@@ -245,11 +87,8 @@
}
],
"metadata": {
- "interpreter": {
- "hash": "ffd44db24f7f67a4f281ba15796957bca8144b3dee6ef1a3a74893a61c2c4db7"
- },
"kernelspec": {
- "display_name": "Python 3.8.8 64-bit ('base': conda)",
+ "display_name": "Python 3.8.10 64-bit",
"language": "python",
"name": "python3"
},
@@ -263,9 +102,14 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.7"
+ "version": "3.8.10"
},
- "orig_nbformat": 4
+ "orig_nbformat": 4,
+ "vscode": {
+ "interpreter": {
+ "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+ }
+ }
},
"nbformat": 4,
"nbformat_minor": 2
diff --git a/example/Insurance Premium Prediction.ipynb b/example/Insurance Premium Prediction.ipynb
index 542de11..db7afb4 100644
--- a/example/Insurance Premium Prediction.ipynb
+++ b/example/Insurance Premium Prediction.ipynb
@@ -2,13 +2,13 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import InsurAutoML\n",
"from InsurAutoML import load_data, AutoTabular\n",
- "from InsurAutoML._utils import train_test_split"
+ "from InsurAutoML.utils import train_test_split"
]
},
{
@@ -21,7 +21,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -32,115 +32,16 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " age | \n",
- " sex | \n",
- " bmi | \n",
- " children | \n",
- " smoker | \n",
- " region | \n",
- " expenses | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 19 | \n",
- " female | \n",
- " 27.9 | \n",
- " 0 | \n",
- " yes | \n",
- " southwest | \n",
- " 16884.92 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 18 | \n",
- " male | \n",
- " 33.8 | \n",
- " 1 | \n",
- " no | \n",
- " southeast | \n",
- " 1725.55 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 28 | \n",
- " male | \n",
- " 33.0 | \n",
- " 3 | \n",
- " no | \n",
- " southeast | \n",
- " 4449.46 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 33 | \n",
- " male | \n",
- " 22.7 | \n",
- " 0 | \n",
- " no | \n",
- " northwest | \n",
- " 21984.47 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 32 | \n",
- " male | \n",
- " 28.9 | \n",
- " 0 | \n",
- " no | \n",
- " northwest | \n",
- " 3866.86 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " age sex bmi children smoker region expenses\n",
- "0 19 female 27.9 0 yes southwest 16884.92\n",
- "1 18 male 33.8 1 no southeast 1725.55\n",
- "2 28 male 33.0 3 no southeast 4449.46\n",
- "3 33 male 22.7 0 no northwest 21984.47\n",
- "4 32 male 28.9 0 no northwest 3866.86"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"database['insurance'].head(5)"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -152,7 +53,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -164,20 +65,9 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# fit AutoML model\n",
"mol = AutoTabular(seed = 1)\n",
@@ -186,20 +76,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "21309279.613129355"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# predict using AutoML model\n",
"from sklearn.metrics import mean_squared_error\n",
@@ -209,22 +88,9 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "