Skip to content

Commit

Permalink
Merge pull request #15 from PanyiDong/dev
Browse files Browse the repository at this point in the history
0.2.1 Windows support, feature selection, ...
  • Loading branch information
PanyiDong authored Apr 30, 2022
2 parents 9714e3e + 657e196 commit b1399da
Show file tree
Hide file tree
Showing 26 changed files with 3,467 additions and 389 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/build-windows.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: build-windows
on:
- pull_request
- push
jobs:
run:
runs-on: windows-latest
env:
OS: windows-latest
PYTHON: "3.9"
steps:
- uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@master
with:
python-version: 3.9
- name: "run tests (without neural network)"
run: |
pip install pytest
pip install -e .[normal]
pytest tests/
4 changes: 3 additions & 1 deletion My_AutoML/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Author: Panyi Dong (panyid2@illinois.edu)
-----
Last Modified: Friday, 8th April 2022 10:17:18 pm
Last Modified: Friday, 29th April 2022 2:25:44 pm
Modified By: Panyi Dong (panyid2@illinois.edu)
-----
Expand All @@ -38,6 +38,8 @@
SOFTWARE.
"""

__version__ = "0.2.0"

from ._base import no_processing, load_data
from ._utils import (
# random_guess,
Expand Down
23 changes: 20 additions & 3 deletions My_AutoML/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Author: Panyi Dong (panyid2@illinois.edu)
-----
Last Modified: Saturday, 16th April 2022 12:05:36 am
Last Modified: Saturday, 30th April 2022 12:49:24 pm
Modified By: Panyi Dong (panyid2@illinois.edu)
-----
Expand All @@ -38,6 +38,7 @@
SOFTWARE.
"""

import os
import glob
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -191,7 +192,15 @@ def _main(self, path, filename):
warnings.warn("No .asc file found!")
elif _csv_files + _data_files:
for _data_path in _csv_files + _data_files:
_filename = _data_path.split("/")[-1]
# in linux path, the path separator is '/'
# in windows path, the path separator is '\\'
# _filename = (
# _data_path.split("/")[-1]
# if "/" in _data_path
# else _data_path.split("\\")[-1]
# )
# use os.path.split for unify path separator
_filename = os.path.split(_data_path)[-1]
self.database[_filename.split(".")[0]] = pd.read_csv(_data_path)

# load .rda/.rdata files in the path
Expand Down Expand Up @@ -232,7 +241,15 @@ def _main(self, path, filename):
warnings.warn("No .rdata file found!")
elif _rda_files + _rdata_files:
for _data_path in _rda_files + _rdata_files:
_filename = _data_path.split("/")[-1]
# in linux path, the path separator is '/'
# in windows path, the path separator is '\\'
# _filename = (
# _data_path.split("/")[-1]
# if "/" in _data_path
# else _data_path.split("\\")[-1]
# )
# use os.path.split for unify path separator
_filename = os.path.split(_data_path)[-1]
ro.r('load("' + _data_path + '")')
ro.r("rdata = " + _filename.split(".")[0])
with localconverter(ro.default_converter + pandas2ri.converter):
Expand Down
5 changes: 4 additions & 1 deletion My_AutoML/_constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Author: Panyi Dong (panyid2@illinois.edu)
-----
Last Modified: Saturday, 23rd April 2022 11:02:02 pm
Last Modified: Wednesday, 27th April 2022 5:53:01 pm
Modified By: Panyi Dong (panyid2@illinois.edu)
-----
Expand Down Expand Up @@ -164,6 +164,9 @@
# 31 is capped by days in a month
UNI_CLASS = 31

# maximum iteration allowed for the algorithm
MAX_ITER = 1024

# LightGBM default object (metric/loss)
# binary classification
LIGHTGBM_BINARY_CLASSIFICATION = ["binary", "cross_entropy"]
Expand Down
93 changes: 71 additions & 22 deletions My_AutoML/_feature_selection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Author: Panyi Dong (panyid2@illinois.edu)
-----
Last Modified: Sunday, 24th April 2022 5:49:36 pm
Last Modified: Friday, 29th April 2022 10:37:52 am
Modified By: Panyi Dong (panyid2@illinois.edu)
-----
Expand Down Expand Up @@ -39,17 +39,6 @@
"""

from ._base import PCA_FeatureSelection, RBFSampler
from ._autosklearn import (
extra_trees_preproc_for_classification,
extra_trees_preproc_for_regression,
liblinear_svc_preprocessor,
polynomial,
select_percentile_classification,
select_percentile_regression,
select_rates_classification,
select_rates_regression,
truncatedSVD,
)

# from ._imported import (
# Densifier,
Expand Down Expand Up @@ -89,18 +78,78 @@
"FeatureFilter": FeatureFilter,
"ASFFS": ASFFS,
"GeneticAlgorithm": GeneticAlgorithm,
# from autosklearn
"extra_trees_preproc_for_classification": extra_trees_preproc_for_classification,
"extra_trees_preproc_for_regression": extra_trees_preproc_for_regression,
"liblinear_svc_preprocessor": liblinear_svc_preprocessor,
"polynomial": polynomial,
"select_percentile_classification": select_percentile_classification,
"select_percentile_regression": select_percentile_regression,
"select_rates_classification": select_rates_classification,
"select_rates_regression": select_rates_regression,
"truncatedSVD": truncatedSVD,
# "ExhaustiveFS": ExhaustiveFS, # exhaustive search is not practical, takes too long
"SFS": SFS,
"mRMR": mRMR,
"CBFS": CBFS,
}

import importlib

# check if autosklearn is installed, if not, use sklearn replacement
autosklearn_spec = importlib.util.find_spec("autosklearn")
sklearn_spec = importlib.util.find_spec("sklearn")

if autosklearn_spec is not None:
from ._autosklearn import (
extra_trees_preproc_for_classification,
extra_trees_preproc_for_regression,
liblinear_svc_preprocessor,
polynomial,
select_percentile_classification,
select_percentile_regression,
select_rates_classification,
select_rates_regression,
truncatedSVD,
)

# from autosklearn
feature_selections[
"extra_trees_preproc_for_classification"
] = extra_trees_preproc_for_classification
feature_selections[
"extra_trees_preproc_for_regression"
] = extra_trees_preproc_for_regression
feature_selections["liblinear_svc_preprocessor"] = liblinear_svc_preprocessor
feature_selections["polynomial"] = polynomial
feature_selections[
"select_percentile_classification"
] = select_percentile_classification
feature_selections["select_percentile_regression"] = select_percentile_regression
feature_selections["select_rates_classification"] = select_rates_classification
feature_selections["select_rates_regression"] = select_rates_regression
feature_selections["truncatedSVD"] = truncatedSVD
# elif sklearn not installed, raise error
elif sklearn_spec is None:
raise ImportError(
"None of autosklearn or sklearn is installed. Please install at least one of them to use feature selection."
)
else:
from ._sklearn import (
extra_trees_preproc_for_classification,
extra_trees_preproc_for_regression,
liblinear_svc_preprocessor,
polynomial,
select_percentile_classification,
select_percentile_regression,
select_rates_classification,
select_rates_regression,
truncatedSVD,
)

# from autosklearn
feature_selections[
"extra_trees_preproc_for_classification"
] = extra_trees_preproc_for_classification
feature_selections[
"extra_trees_preproc_for_regression"
] = extra_trees_preproc_for_regression
feature_selections["liblinear_svc_preprocessor"] = liblinear_svc_preprocessor
feature_selections["polynomial"] = polynomial
feature_selections[
"select_percentile_classification"
] = select_percentile_classification
feature_selections["select_percentile_regression"] = select_percentile_regression
feature_selections["select_rates_classification"] = select_rates_classification
feature_selections["select_rates_regression"] = select_rates_regression
feature_selections["truncatedSVD"] = truncatedSVD
12 changes: 6 additions & 6 deletions My_AutoML/_feature_selection/_advance.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Author: Panyi Dong (panyid2@illinois.edu)
-----
Last Modified: Sunday, 24th April 2022 10:49:15 pm
Last Modified: Friday, 29th April 2022 10:27:54 am
Modified By: Panyi Dong (panyid2@illinois.edu)
-----
Expand Down Expand Up @@ -982,7 +982,7 @@ def __init__(

self._fitted = False

def select_feature(self, X, y, selected_features, unselected_features):
def select_feature(self, X, y, estimator, selected_features, unselected_features):

# select one feature as step, get all possible combinations
test_item = list(combinations(unselected_features, 1))
Expand All @@ -993,9 +993,9 @@ def select_feature(self, X, y, selected_features, unselected_features):
results = []
for _comb in test_comb:
# fit estimator
self.estimator.fit(X.iloc[:, _comb], y)
estimator.fit(X.iloc[:, _comb], y)
# get test results
test_results = self.criteria(y, self.estimator.predict(X.iloc[:, _comb]))
test_results = self.criteria(y, estimator.predict(X.iloc[:, _comb]))
# append test results
results.append(test_results)

Expand All @@ -1022,7 +1022,7 @@ def fit(self, X, y=None):
raise ValueError("Must have response!")

# make sure estimator is recognized
self.estimator = get_estimator(self.estimator)
estimator = get_estimator(self.estimator)

# check whether n_components/n_prop is valid
if self.n_components is None and self.n_prop is None:
Expand All @@ -1045,7 +1045,7 @@ def fit(self, X, y=None):
for _ in range(self.n_components):
# get the current optimal loss and feature
loss, new_feature = self.select_feature(
X, y, selected_features, unselected_features
X, y, estimator, selected_features, unselected_features
)
if loss > optimal_loss: # if no better combination is found, stop
break
Expand Down
2 changes: 1 addition & 1 deletion My_AutoML/_feature_selection/_autosklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Author: Panyi Dong (panyid2@illinois.edu)
-----
Last Modified: Saturday, 16th April 2022 7:58:53 pm
Last Modified: Friday, 29th April 2022 10:52:32 am
Modified By: Panyi Dong (panyid2@illinois.edu)
-----
Expand Down
Loading

0 comments on commit b1399da

Please sign in to comment.