Merge pull request #15 from PanyiDong/dev

0.2.1 Windows support, feature selection, ...
PanyiDong · Apr 30, 2022 · b1399da · b1399da
2 parents 9714e3e + 657e196
commit b1399da
Show file tree

Hide file tree

Showing 26 changed files with 3,467 additions and 389 deletions.
diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml
@@ -0,0 +1,21 @@
+name: build-windows
+on:
+  - pull_request
+  - push
+jobs:
+  run:
+    runs-on: windows-latest
+    env:
+      OS: windows-latest
+      PYTHON: "3.9"
+    steps:
+      - uses: actions/checkout@master
+      - name: Setup Python
+        uses: actions/setup-python@master
+        with:
+          python-version: 3.9
+      - name: "run tests (without neural network)"
+        run: |
+          pip install pytest
+          pip install -e .[normal]
+          pytest tests/
diff --git a/My_AutoML/__init__.py b/My_AutoML/__init__.py
@@ -11,7 +11,7 @@
 Author: Panyi Dong (panyid2@illinois.edu)
 
 -----
-Last Modified: Friday, 8th April 2022 10:17:18 pm
+Last Modified: Friday, 29th April 2022 2:25:44 pm
 Modified By: Panyi Dong (panyid2@illinois.edu)
 
 -----
@@ -38,6 +38,8 @@
 SOFTWARE.
 """
 
+__version__ = "0.2.0"
+
 from ._base import no_processing, load_data
 from ._utils import (
     # random_guess,

diff --git a/My_AutoML/_base.py b/My_AutoML/_base.py
@@ -11,7 +11,7 @@
 Author: Panyi Dong (panyid2@illinois.edu)
 
 -----
-Last Modified: Saturday, 16th April 2022 12:05:36 am
+Last Modified: Saturday, 30th April 2022 12:49:24 pm
 Modified By: Panyi Dong (panyid2@illinois.edu)
 
 -----
@@ -38,6 +38,7 @@
 SOFTWARE.
 """
 
+import os
 import glob
 import numpy as np
 import pandas as pd
@@ -191,7 +192,15 @@ def _main(self, path, filename):
                 warnings.warn("No .asc file found!")
             elif _csv_files + _data_files:
                 for _data_path in _csv_files + _data_files:
-                    _filename = _data_path.split("/")[-1]
+                    # in linux path, the path separator is '/'
+                    # in windows path, the path separator is '\\'
+                    # _filename = (
+                    #     _data_path.split("/")[-1]
+                    #     if "/" in _data_path
+                    #     else _data_path.split("\\")[-1]
+                    # )
+                    # use os.path.split for unify path separator
+                    _filename = os.path.split(_data_path)[-1]
                     self.database[_filename.split(".")[0]] = pd.read_csv(_data_path)
 
         # load .rda/.rdata files in the path
@@ -232,7 +241,15 @@ def _main(self, path, filename):
                     warnings.warn("No .rdata file found!")
                 elif _rda_files + _rdata_files:
                     for _data_path in _rda_files + _rdata_files:
-                        _filename = _data_path.split("/")[-1]
+                        # in linux path, the path separator is '/'
+                        # in windows path, the path separator is '\\'
+                        # _filename = (
+                        #     _data_path.split("/")[-1]
+                        #     if "/" in _data_path
+                        #     else _data_path.split("\\")[-1]
+                        # )
+                        # use os.path.split for unify path separator
+                        _filename = os.path.split(_data_path)[-1]
                         ro.r('load("' + _data_path + '")')
                         ro.r("rdata = " + _filename.split(".")[0])
                         with localconverter(ro.default_converter + pandas2ri.converter):

diff --git a/My_AutoML/_constant.py b/My_AutoML/_constant.py
@@ -11,7 +11,7 @@
 Author: Panyi Dong (panyid2@illinois.edu)
 
 -----
-Last Modified: Saturday, 23rd April 2022 11:02:02 pm
+Last Modified: Wednesday, 27th April 2022 5:53:01 pm
 Modified By: Panyi Dong (panyid2@illinois.edu)
 
 -----
@@ -164,6 +164,9 @@
 # 31 is capped by days in a month
 UNI_CLASS = 31
 
+# maximum iteration allowed for the algorithm
+MAX_ITER = 1024
+
 # LightGBM default object (metric/loss)
 # binary classification
 LIGHTGBM_BINARY_CLASSIFICATION = ["binary", "cross_entropy"]

diff --git a/My_AutoML/_feature_selection/__init__.py b/My_AutoML/_feature_selection/__init__.py
@@ -11,7 +11,7 @@
 Author: Panyi Dong (panyid2@illinois.edu)
 
 -----
-Last Modified: Sunday, 24th April 2022 5:49:36 pm
+Last Modified: Friday, 29th April 2022 10:37:52 am
 Modified By: Panyi Dong (panyid2@illinois.edu)
 
 -----
@@ -39,17 +39,6 @@
 """
 
 from ._base import PCA_FeatureSelection, RBFSampler
-from ._autosklearn import (
-    extra_trees_preproc_for_classification,
-    extra_trees_preproc_for_regression,
-    liblinear_svc_preprocessor,
-    polynomial,
-    select_percentile_classification,
-    select_percentile_regression,
-    select_rates_classification,
-    select_rates_regression,
-    truncatedSVD,
-)
 
 # from ._imported import (
 #     Densifier,
@@ -89,18 +78,78 @@
     "FeatureFilter": FeatureFilter,
     "ASFFS": ASFFS,
     "GeneticAlgorithm": GeneticAlgorithm,
-    # from autosklearn
-    "extra_trees_preproc_for_classification": extra_trees_preproc_for_classification,
-    "extra_trees_preproc_for_regression": extra_trees_preproc_for_regression,
-    "liblinear_svc_preprocessor": liblinear_svc_preprocessor,
-    "polynomial": polynomial,
-    "select_percentile_classification": select_percentile_classification,
-    "select_percentile_regression": select_percentile_regression,
-    "select_rates_classification": select_rates_classification,
-    "select_rates_regression": select_rates_regression,
-    "truncatedSVD": truncatedSVD,
     # "ExhaustiveFS": ExhaustiveFS, # exhaustive search is not practical, takes too long
     "SFS": SFS,
     "mRMR": mRMR,
     "CBFS": CBFS,
 }
+
+import importlib
+
+# check if autosklearn is installed, if not, use sklearn replacement
+autosklearn_spec = importlib.util.find_spec("autosklearn")
+sklearn_spec = importlib.util.find_spec("sklearn")
+
+if autosklearn_spec is not None:
+    from ._autosklearn import (
+        extra_trees_preproc_for_classification,
+        extra_trees_preproc_for_regression,
+        liblinear_svc_preprocessor,
+        polynomial,
+        select_percentile_classification,
+        select_percentile_regression,
+        select_rates_classification,
+        select_rates_regression,
+        truncatedSVD,
+    )
+
+    # from autosklearn
+    feature_selections[
+        "extra_trees_preproc_for_classification"
+    ] = extra_trees_preproc_for_classification
+    feature_selections[
+        "extra_trees_preproc_for_regression"
+    ] = extra_trees_preproc_for_regression
+    feature_selections["liblinear_svc_preprocessor"] = liblinear_svc_preprocessor
+    feature_selections["polynomial"] = polynomial
+    feature_selections[
+        "select_percentile_classification"
+    ] = select_percentile_classification
+    feature_selections["select_percentile_regression"] = select_percentile_regression
+    feature_selections["select_rates_classification"] = select_rates_classification
+    feature_selections["select_rates_regression"] = select_rates_regression
+    feature_selections["truncatedSVD"] = truncatedSVD
+# elif sklearn not installed, raise error
+elif sklearn_spec is None:
+    raise ImportError(
+        "None of autosklearn or sklearn is installed. Please install at least one of them to use feature selection."
+    )
+else:
+    from ._sklearn import (
+        extra_trees_preproc_for_classification,
+        extra_trees_preproc_for_regression,
+        liblinear_svc_preprocessor,
+        polynomial,
+        select_percentile_classification,
+        select_percentile_regression,
+        select_rates_classification,
+        select_rates_regression,
+        truncatedSVD,
+    )
+
+    # from autosklearn
+    feature_selections[
+        "extra_trees_preproc_for_classification"
+    ] = extra_trees_preproc_for_classification
+    feature_selections[
+        "extra_trees_preproc_for_regression"
+    ] = extra_trees_preproc_for_regression
+    feature_selections["liblinear_svc_preprocessor"] = liblinear_svc_preprocessor
+    feature_selections["polynomial"] = polynomial
+    feature_selections[
+        "select_percentile_classification"
+    ] = select_percentile_classification
+    feature_selections["select_percentile_regression"] = select_percentile_regression
+    feature_selections["select_rates_classification"] = select_rates_classification
+    feature_selections["select_rates_regression"] = select_rates_regression
+    feature_selections["truncatedSVD"] = truncatedSVD
diff --git a/My_AutoML/_feature_selection/_advance.py b/My_AutoML/_feature_selection/_advance.py
@@ -11,7 +11,7 @@
 Author: Panyi Dong (panyid2@illinois.edu)
 
 -----
-Last Modified: Sunday, 24th April 2022 10:49:15 pm
+Last Modified: Friday, 29th April 2022 10:27:54 am
 Modified By: Panyi Dong (panyid2@illinois.edu)
 
 -----
@@ -982,7 +982,7 @@ def __init__(
 
         self._fitted = False
 
-    def select_feature(self, X, y, selected_features, unselected_features):
+    def select_feature(self, X, y, estimator, selected_features, unselected_features):
 
         # select one feature as step, get all possible combinations
         test_item = list(combinations(unselected_features, 1))
@@ -993,9 +993,9 @@ def select_feature(self, X, y, selected_features, unselected_features):
         results = []
         for _comb in test_comb:
             # fit estimator
-            self.estimator.fit(X.iloc[:, _comb], y)
+            estimator.fit(X.iloc[:, _comb], y)
             # get test results
-            test_results = self.criteria(y, self.estimator.predict(X.iloc[:, _comb]))
+            test_results = self.criteria(y, estimator.predict(X.iloc[:, _comb]))
             # append test results
             results.append(test_results)
 
@@ -1022,7 +1022,7 @@ def fit(self, X, y=None):
             raise ValueError("Must have response!")
 
         # make sure estimator is recognized
-        self.estimator = get_estimator(self.estimator)
+        estimator = get_estimator(self.estimator)
 
         # check whether n_components/n_prop is valid
         if self.n_components is None and self.n_prop is None:
@@ -1045,7 +1045,7 @@ def fit(self, X, y=None):
         for _ in range(self.n_components):
             # get the current optimal loss and feature
             loss, new_feature = self.select_feature(
-                X, y, selected_features, unselected_features
+                X, y, estimator, selected_features, unselected_features
             )
             if loss > optimal_loss:  # if no better combination is found, stop
                 break

diff --git a/My_AutoML/_feature_selection/_autosklearn.py b/My_AutoML/_feature_selection/_autosklearn.py
@@ -11,7 +11,7 @@
 Author: Panyi Dong (panyid2@illinois.edu)
 
 -----
-Last Modified: Saturday, 16th April 2022 7:58:53 pm
+Last Modified: Friday, 29th April 2022 10:52:32 am
 Modified By: Panyi Dong (panyid2@illinois.edu)
 
 -----