From 3632c99b949ca6e261492e4a8562e3811a2d4f1f Mon Sep 17 00:00:00 2001
From: hyunjuna <hyunjun.choi@cshs.org>
Date: Mon, 5 Dec 2022 16:38:42 -0800
Subject: [PATCH 1/2] handle exceptional case in the plot_learningcurve

---
 machine/learn/skl_utils.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py
index 2d57f0262..c17fd3836 100644
--- a/machine/learn/skl_utils.py
+++ b/machine/learn/skl_utils.py
@@ -377,7 +377,7 @@ def generate_results(model, input_data,
             model = clf.best_estimator_
         else:
             print("param_grid else")
-            # plot_learning_curve(tmpdir,_id, model,features,target,cv,return_times=True)
+            plot_learning_curve(tmpdir,_id, model,features,target,cv,return_times=True)
             model.fit(features, target)
 
         
@@ -514,7 +514,7 @@ def generate_results(model, input_data,
         # plot_pca_3d_iris(tmpdir,_id,features,target)
         
         # this
-        plot_tsne(tmpdir,_id,features,target)
+        # plot_tsne(tmpdir,_id,features,target)
 
         if type(model).__name__ == 'Pipeline':
             step_names = [step[0] for step in model.steps]
@@ -1146,7 +1146,19 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
     # test_scores_mean = np.mean(test_scores, axis=1)
     # test_scores_std = np.std(test_scores, axis=1)
 
-
+    # if train_sizes.tolist() has nan, then replace it with 0
+    # check if train_sizes.tolist has nan
+    if np.isnan(train_sizes.tolist()).any():
+        #replace nan with -1
+        train_sizes = np.nan_to_num(train_sizes, nan=-1)
+    if np.isnan(train_scores.tolist()).any():
+        # replace nan with -1
+        train_scores = np.nan_to_num(train_scores, nan=-1)
+    if np.isnan(test_scores.tolist()).any():
+        # replace nan with -1
+        test_scores = np.nan_to_num(test_scores, nan=-1)
+
+    # if train_scores
     print('train_sizes.tolist()',train_sizes.tolist())
     print('train_scores', train_scores.tolist())
     print('test_scores', test_scores.tolist())

From 394f0996184b22234542fd5dfcaf28fa5d632381 Mon Sep 17 00:00:00 2001
From: hyunjuna <hyunjun.choi@cshs.org>
Date: Mon, 5 Dec 2022 17:13:12 -0800
Subject: [PATCH 2/2] update to deal with the case when wrong hyper-parameters
 are given for learning curve

---
 machine/learn/skl_utils.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py
index c17fd3836..833a6dc04 100644
--- a/machine/learn/skl_utils.py
+++ b/machine/learn/skl_utils.py
@@ -1094,7 +1094,8 @@ def plot_imp_score(tmpdir, _id, coefs, feature_names, imp_score_type):
 
 def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
 
-
+    # Plot learning curve
+    print("Plotting learning curve...")
 
     
     features = np.array(features)
@@ -1147,14 +1148,15 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
     # test_scores_std = np.std(test_scores, axis=1)
 
     # if train_sizes.tolist() has nan, then replace it with 0
-    # check if train_sizes.tolist has nan
-    if np.isnan(train_sizes.tolist()).any():
+    # check if all of train_sizes.tolist has nan
+
+    if np.isnan(train_sizes.tolist()).all():
         #replace nan with -1
         train_sizes = np.nan_to_num(train_sizes, nan=-1)
-    if np.isnan(train_scores.tolist()).any():
+    if np.isnan(train_scores.tolist()).all():
         # replace nan with -1
         train_scores = np.nan_to_num(train_scores, nan=-1)
-    if np.isnan(test_scores.tolist()).any():
+    if np.isnan(test_scores.tolist()).all():
         # replace nan with -1
         test_scores = np.nan_to_num(test_scores, nan=-1)
 
@@ -1174,6 +1176,8 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
 
 
 def plot_pca_2d(tmpdir,_id,features,target):
+
+    print("plot_pca_2d")
     # import numpy as np
     # import matplotlib.pyplot as plt
 
@@ -1475,6 +1479,8 @@ def plot_tsne(tmpdir,_id,features,target):
     # print(X)
     # print(y)
 
+    print("Plotting t-SNE")
+
     tsne = TSNE(n_components=2, verbose=1, random_state=123)
     X_2d = tsne.fit_transform(X)
 
@@ -1510,7 +1516,12 @@ def plot_tsne(tmpdir,_id,features,target):
     # path = tmpdir + _id + '/tsneJson_' + _id + '.json'
     # import json
     
+    # X_2d
+    print("X_2d",X_2d)
+    print("y",y)
 
+    X_2d = [1]
+    y = [1]
 
     # save X and y to json file
     tsne_dict = {