Merge pull request #395 from HyunjunA/infvisfrontendmlbackend

Add interactive t-sne and pca
EpistasisLab · Nov 21, 2022 · b11ca6b · b11ca6b
2 parents 4c05859 + 39dce70
commit b11ca6b
Show file tree

Hide file tree

Showing 272 changed files with 195,504 additions and 899 deletions.
diff --git a/README.md b/README.md
@@ -21,9 +21,8 @@ Browse the repo:
 About the Project
 =================
 
-Aliro is actively developed by the [Institute for Biomedical Informatics](http://upibi.org) at the University of Pennsylvania.
-Contributors include Heather Williams, Weixuan Fu, William La Cava, Josh Cohen,
-Steve Vitale, Sharon Tartarone, Randal Olson, Patryk Orzechowski, and Jason Moore. 
+Aliro is actively developed by the Center for Artificial Intelligence Research (CAIR) in the [Department of Computational Biomedicine](https://www.cedars-sinai.edu/research/departments-institutes/computational-biomedicine.html) at [Cedars-Sinai Medical Center](https://www.cedars-sinai.org/) in Los Angeles.
+Contributors include Hyunjun Choi, Miguel Hernandez, Nick Matsumoto, Jay Moran, Paul Wang, and Jason Moore (PI).
 
 Cite
 ====

diff --git a/ai/sklearn/config/classifiers.py b/ai/sklearn/config/classifiers.py
@@ -1,5 +1,6 @@
 classifier_config_dict = {
 
+    # Original six classifiers
     'sklearn.tree.DecisionTreeClassifier': {
         'params': {
             'criterion': ["gini", "entropy"],
@@ -75,5 +76,74 @@
             'bootstrap': [True, False],
             'min_weight_fraction_leaf': [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
         }
-    }
+    },
+
+
+
+
+
+    # new classifiers
+    # 'sklearn.ensemble.AdaBoostClassifier': {
+    #     'params': {
+    #         'n_estimators': [100, 500],
+    #         'learning_rate': [0.01, 0.1, 1],
+    #         'algorithm': ["SAMME", "SAMME.R"]
+    #     }
+    # },
+
+
+    # 'sklearn.cluster.KMeans': {
+    #     'params': {
+    #         'n_clusters': [2, 3, 4, 5, 6, 7, 8, 9, 10],
+    #         'init': ["k-means++", "random"],
+    #         'n_init': [10, 20, 30],
+    #         'max_iter': [100, 200, 300, 400, 500],
+    #         'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
+    #     }
+    # },
+
+    # 'sklearn.naive_bayes.GaussianNB': {
+    #     'params': {
+    #         'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
+    #     }
+    # },
+
+    # 'sklearn.naive_bayes.MultinomialNB': {
+    #     'params': {
+    #         'alpha': [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
+    #         'fit_prior': [True, False]
+    #     }
+    # },
+
+    # 'sklearn.naive_bayes.BernoulliNB': {
+    #     'params': {
+    #         'alpha': [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
+    #         'fit_prior': [True, False]
+    #     }
+    # },
+
+    # 'sklearn.neural_network.MLPClassifier': {
+    #     'params': {
+    #         'hidden_layer_sizes': [(100,), (100, 100), (100, 100, 100)],
+    #         'activation': ["identity", "logistic", "tanh", "relu"],
+    #         'solver': ["lbfgs", "sgd", "adam"],
+    #         'alpha': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
+    #         'learning_rate': ["constant", "invscaling", "adaptive"],
+    #         'learning_rate_init': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
+    #         'power_t': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+    #         'max_iter': [100, 500, 1000, 2000, 5000, 10000],
+    #         'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
+    #         'momentum': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+    #         'nesterovs_momentum': [True, False],
+    #         'early_stopping': [True, False],
+    #         'beta_1': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+    #         'beta_2': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+    #         'epsilon': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
+    #         'validation_fraction': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
+    #         'n_iter_no_change': [5, 10, 20, 50, 100]
+    #     }
+    # }
+
+
+
 }
diff --git a/config/common.env b/config/common.env
@@ -18,7 +18,7 @@ MACHINE_HOST=machine
 MACHINE_CONFIG=/appsrc/config/machine_config.json
 MACHINE_SHAP_SAMPLES_KERNEL_EXPLAINER=50
 MACHINE_SHAP_SAMPLES_OTHER_EXPLAINER=100
-EXP_TIMEOUT=10
+EXP_TIMEOUT=100
 DT_MAX_DEPTH=6
 
 STARTUP_DATASET_PATH=/appsrc/data/datasets/user
diff --git a/config/machine_config.json b/config/machine_config.json
@@ -1,3 +1,6 @@
+
+
+
 {
   "algorithms": ["DecisionTreeClassifier",
                 "GradientBoostingClassifier",
@@ -10,5 +13,14 @@
                 "SVR",
                 "KNeighborsRegressor",
                 "KernelRidge",
-                "RandomForestRegressor"]
+                "RandomForestRegressor",
+                "AdaBoostClassifier"
+                ,"KMeans"
+                ,"GaussianNB"
+                ,"MultinomialNB"
+                ,"BernoulliNB"
+                ,"MLPClassifier"
+              ]
 }
+
+
diff --git a/data/datasets/pmlb_small/README.md b/data/datasets/pmlb_small/README.md
@@ -0,0 +1,9 @@
+# Benchmark data sets
+
+This directory contains over 150 data sets for benchmarking supervised machine learning algorithms.
+
+Each subdirectory corresponds to a separate data set, and will have a README file providing some basic information about the data set.
+
+# High-level summary of data sets
+
+[in progress]
diff --git a/data/datasets/pmlb_small/allbp/README.md b/data/datasets/pmlb_small/allbp/README.md
@@ -0,0 +1,80 @@
+# allbp
+
+## Summary Stats
+
+#instances: 3772
+
+#features: 29
+
+  #binary_features: 21
+
+  #integer_features: 8
+
+  #float_features: 0
+
+Endpoint type: integer
+
+#Classes: 3
+
+Imbalance metric: 0.8755228428707819
+
+## Feature Types
+
+ age:discrete
+
+sex:discrete
+
+on thyroxine:binary
+
+query on thyroxine:binary
+
+on antithyroid medication:binary
+
+sick:binary
+
+pregnant:binary
+
+thyroid surgery:binary
+
+I131 treatment:binary
+
+query hypothyroid:binary
+
+query hyperthyroid:binary
+
+lithium:binary
+
+goitre:binary
+
+tumor:binary
+
+hypopituitary:binary
+
+psych:binary
+
+TSH measured:binary
+
+TSH:discrete
+
+T3 measured:binary
+
+T3:discrete
+
+TT4 measured:binary
+
+TT4:discrete
+
+T4U measured:binary
+
+T4U:discrete
+
+FTI measured:binary
+
+FTI:discrete
+
+TBG measured:binary
+
+TBG:binary
+
+referral source:discrete
+