EpistasisLab · HyunjunA · Dec 20, 2022 · Dec 20, 2022 · Dec 20, 2022 · Dec 20, 2022
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -136,6 +136,12 @@ These methods generate sklearn models and evaluate them.
 
 .. autofunction:: machine.learn.skl_utils.plot_confusion_matrix
 
+.. autofunction:: machine.learn.skl_utils.plot_learning_curve
+
+.. autofunction:: machine.learn.skl_utils.plot_pca_2d
+
+.. autofunction:: machine.learn.skl_utils.plot_tsne_2d
+
 .. autofunction:: machine.learn.skl_utils.plot_roc_curve
 
 .. autofunction:: machine.learn.skl_utils.plot_imp_score

diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py
@@ -50,6 +50,10 @@
 import matplotlib.colors as mcolors
 from matplotlib.patches import Patch
 from sklearn.manifold import TSNE
+from sklearn.model_selection import learning_curve
+
+
+
 
 mpl.use('Agg')
 
@@ -503,7 +507,7 @@ def generate_results(model, input_data,
         plot_pca_2d(tmpdir,_id,features,target)
         # plot_pca_3d(tmpdir,_id,features,target)
         # plot_pca_3d_iris(tmpdir,_id,features,target)
-        plot_tsne(tmpdir,_id,features,target)
+        plot_tsne_2d(tmpdir,_id,features,target)
 
         if type(model).__name__ == 'Pipeline':
             step_names = [step[0] for step in model.steps]
@@ -1082,10 +1086,27 @@ def plot_imp_score(tmpdir, _id, coefs, feature_names, imp_score_type):
     return top_features, indices
 
 def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
+    """Make learning curve.
+
+    Parameters
+    ----------
+    tmpdir: string
+        Temporary directory for saving experiment results
+    _id: string
+        Experiment ID in Aliro
+    model: user specified model
+    features: np.darray/pd.DataFrame
+        Features in training dataset
+    target: np.darray/pd.DataFrame
+        Target in training dataset
+    cv: int, cross-validation generator or an iterable 
 
-    from sklearn.model_selection import learning_curve
-    from matplotlib import pyplot as plt
-    import numpy as np
+    Returns
+    -------
+    None
+    """
+
+
 
 
     features = np.array(features)
@@ -1094,7 +1115,6 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
     target[target == -1] = 0
 
 
-
     train_sizes, train_scores, test_scores, fit_times, _ = learning_curve(model,features,target,None, np.linspace(0.1, 1.0, 5), cv,return_times=True)
 
     plt.xlabel("Training examples")
@@ -1108,9 +1128,6 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
 
     plt.grid()
 
-    # print('train_scores_mean',train_scores_mean)
-    # print('test_scores_mean',test_scores_mean)
-    # print('train_sizes',train_sizes)
 
     plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                      train_scores_mean + train_scores_std, alpha=0.1,
@@ -1126,16 +1143,12 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
     plt.title('Learning curve')
 
     plt.legend(loc='best')
-    # plt.legend(loc="lower right")
     plt.savefig(tmpdir + _id + '/learning_curve_' + _id + '.png')
 
 
     plt.close()
 
-    # train_scores_mean = np.mean(train_scores, axis=1)
-    # train_scores_std = np.std(train_scores, axis=1)
-    # test_scores_mean = np.mean(test_scores, axis=1)
-    # test_scores_std = np.std(test_scores, axis=1)
+
 
     if np.isnan(train_sizes.tolist()).all():
         #replace nan with -1
@@ -1158,26 +1171,31 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
 
 
 def plot_pca_2d(tmpdir,_id,features,target):
-    # import numpy as np
-    # import matplotlib.pyplot as plt
-
-
-    # from sklearn import decomposition
-    # import matplotlib.colors as mcolors
-    # from matplotlib.patches import Patch
+    """Make PCA on 2D.
 
+    Parameters
+    ----------
+    tmpdir: string
+        Temporary directory for saving 2d pca plot and json file
+    _id: string
+        Experiment ID in Aliro
+
+    features: np.darray/pd.DataFrame
+        Features in training dataset
+    target: np.darray/pd.DataFrame
+        Target in training dataset
 
-    # from sklearn import datasets
-
-    # np.random.seed(5)
-
-    # iris = datasets.load_iris()
-    # print(features)
+    Returns
+    -------
+    None
+    """
     X = np.array(features)
     y = np.array(target)
 
     print(set(y))
 
+
+
 
 
 
@@ -1188,59 +1206,23 @@ def plot_pca_2d(tmpdir,_id,features,target):
     pca.fit(X)
     X = pca.transform(X)
 
-    # plt.scatter(x,y, c = z, cmap = mcolors.ListedColormap(["black", "green"]))
-
-    # plt.show()
-
-
-    # version 1 
-    # colors = np.array(["black", "green"])
-    # plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor='k')
 
 
-
-    # version 2
     num_classes = len(set(y))
     # generate the number of colors equal to the number of classes
     colors = plt.cm.Set1(np.linspace(0, 1, num_classes))
 
     plt.scatter(X[:, 0], X[:, 1], c=y, cmap=mcolors.ListedColormap(colors))
     # plot the legend where the colors are mapped to the classes
     plt.legend(handles=[Patch(color=colors[i], label="class_"+str(i)) for i in range(num_classes)])
-
-    # cb = plt.colorbar()
-    # loc = np.arange(0,max(label),max(label)/float(len(colors)))
-    # cb.set_ticks(loc)
-    # cb.set_ticklabels(colors)
-
-
-
 
 
     # write x axis as pc1 and y axis as pc2
     plt.xlabel('PC1')
     plt.ylabel('PC2')
 
-
-
-
-    # print("X")
-    # print(X)
-
-
-    # ax.w_xaxis.set_ticklabels([])
-    # ax.w_yaxis.set_ticklabels([])
-    # ax.w_zaxis.set_ticklabels([])
-
-    # plt.show()
     plt.savefig(tmpdir + _id + '/pca_' + _id + '.png')
-    plt.close()
-
-
-
-    path = tmpdir + _id + '/pcaJson_' + _id + '.json'
-    import json
-
+    plt.close()    
 
 
     # save X and y to json file
@@ -1249,17 +1231,11 @@ def plot_pca_2d(tmpdir,_id,features,target):
         'y_pca': y.tolist()
     }
 
-    # with open(tmpdir + _id + '/p-c-a-Json_' + _id + '.json', 'w') as f:
-    #     json.dump(pca_dict, f)
-
-    # with open(tmpdir + _id + '/aaachoi_' + _id + '.json', 'w') as f:
-    #     json.dump(pca_dict, f)
 
+    # save json file
     save_json_fmt(outdir=tmpdir, _id=_id,
                   fname="pca-json.json", content=pca_dict)
 
-    # 
-    # save pca_dict to json file with the path
 
 
 def plot_pca_3d(tmpdir,_id,features,target):
@@ -1440,48 +1416,41 @@ def plot_pca_3d_iris(tmpdir,_id,features,target):
     plt.savefig(tmpdir + _id + '/pca_' + _id + '.png')
     plt.close()
 
-def plot_tsne(tmpdir,_id,features,target):
+def plot_tsne_2d(tmpdir,_id,features,target):
 
-    # import numpy as np
-    # import matplotlib.pyplot as plt
-    # from sklearn.manifold import TSNE
-
-    # X = np.array([[1, 1], [2, 1], [1, 0],
-    #               [4, 7], [3, 5], [3, 6]])
-    # y = np.array([0, 0, 0, 1, 1, 1])
+    """Make tsne on 2D.
 
-    # tsne = TSNE(n_components=2, random_state=0)
-    # X_2d = tsne.fit_transform(X)
+    Parameters
+    ----------
+    tmpdir: string
+        Temporary directory for saving 2d t-sne plot and json file
+    _id: string
+        Experiment ID in Aliro
+
+    features: np.darray/pd.DataFrame
+        Features in training dataset
+    target: np.darray/pd.DataFrame
+        Target in training dataset
 
-    # plt.scatter(X_2d[:, 0], X_2d[:, 1])
-    # plt.show()
 
-
+    Returns
+    -------
+    None
+    """
 
-    # X = np.array([[1, 1], [2, 1], [1, 0],
-    #               [4, 7], [3, 5], [3, 6]])
-    # y = np.array([0, 0, 0, 1, 1, 1])
 
-    X = features
-    y = target
 
-    # print(X)
-    # print(y)
 
     tsne = TSNE(n_components=2, verbose=1, random_state=123)
-    X_2d = tsne.fit_transform(X)
+    X_2d = tsne.fit_transform(features)
 
-    # df = pd.DataFrame()
-    # df["y"] = y
-    # df["comp-1"] = X_2d[:,0]
-    # df["comp-2"] = X_2d[:,1]
 
     # version 2
-    num_classes = len(set(y))
+    num_classes = len(set(target))
     # generate the number of colors equal to the number of classes
     colors = plt.cm.Set1(np.linspace(0, 1, num_classes))
 
-    plt.scatter(X_2d[:,0], X_2d[:,1], c=y, cmap=mcolors.ListedColormap(colors))
+    plt.scatter(X_2d[:,0], X_2d[:,1], c=target, cmap=mcolors.ListedColormap(colors))
     # plot the legend where the colors are mapped to the classes
     plt.legend(handles=[Patch(color=colors[i], label="class_"+str(i)) for i in range(num_classes)])
 
@@ -1499,32 +1468,16 @@ def plot_tsne(tmpdir,_id,features,target):
 
 
 
-
-    # path = tmpdir + _id + '/tsneJson_' + _id + '.json'
-    import json
-
-
-
     # save X and y to json file
     tsne_dict = {
         'X_tsne': X_2d.tolist(),
-        'y_tsne': y.tolist()
+        'y_tsne': target.tolist()
     }
 
-    # print('tsne_dict',tsne_dict)
-
-    # with open(tmpdir + _id + '/t-sne-Json_' + _id + '.json', 'w') as f:
-    #     json.dump(tsne_dict, f)
-    # with open(tmpdir + _id + '/wwwchoi_' + _id + '.json', 'w') as f:
-    #     json.dump(tsne_dict, f)
 
     save_json_fmt(outdir=tmpdir, _id=_id,
                   fname="tsne-json.json", content=tsne_dict)
-
-    # save_json_fmt(outdir=tmpdir, _id=_id,
-    #               fname="value.json", content=metrics_dict)
-
-
+
 
 
 

diff --git a/raspberrypi/productpage/css/style.css b/raspberrypi/productpage/css/style.css
@@ -480,7 +480,7 @@ to { opacity: 0; }
 }
 
 
-a#Downloadpage{
+a#Downloadpage, a#installationpage{
 
   color:#e3085d!important;
   /* visibility: hidden; */

diff --git a/raspberrypi/productpage/data/datasets/pmlb_small/iris/README.md b/raspberrypi/productpage/data/datasets/pmlb_small/iris/README.md
@@ -0,0 +1,30 @@
+# iris
+
+## Summary Stats
+
+#instances: 150
+
+#features: 4
+
+  #binary_features: 0
+
+  #integer_features: 0
+
+  #float_features: 4
+
+Endpoint type: integer
+
+#Classes: 3
+
+Imbalance metric: 0.0
+
+## Feature Types
+
+ sepal-length:continous
+
+sepal-width:continous
+
+petal-length:continous
+
+petal-width:continous
+