From 3632c99b949ca6e261492e4a8562e3811a2d4f1f Mon Sep 17 00:00:00 2001 From: hyunjuna Date: Mon, 5 Dec 2022 16:38:42 -0800 Subject: [PATCH 1/2] handle exceptional case in the plot_learningcurve --- machine/learn/skl_utils.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py index 2d57f0262..c17fd3836 100644 --- a/machine/learn/skl_utils.py +++ b/machine/learn/skl_utils.py @@ -377,7 +377,7 @@ def generate_results(model, input_data, model = clf.best_estimator_ else: print("param_grid else") - # plot_learning_curve(tmpdir,_id, model,features,target,cv,return_times=True) + plot_learning_curve(tmpdir,_id, model,features,target,cv,return_times=True) model.fit(features, target) @@ -514,7 +514,7 @@ def generate_results(model, input_data, # plot_pca_3d_iris(tmpdir,_id,features,target) # this - plot_tsne(tmpdir,_id,features,target) + # plot_tsne(tmpdir,_id,features,target) if type(model).__name__ == 'Pipeline': step_names = [step[0] for step in model.steps] @@ -1146,7 +1146,19 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True): # test_scores_mean = np.mean(test_scores, axis=1) # test_scores_std = np.std(test_scores, axis=1) - + # if train_sizes.tolist() has nan, then replace it with 0 + # check if train_sizes.tolist has nan + if np.isnan(train_sizes.tolist()).any(): + #replace nan with -1 + train_sizes = np.nan_to_num(train_sizes, nan=-1) + if np.isnan(train_scores.tolist()).any(): + # replace nan with -1 + train_scores = np.nan_to_num(train_scores, nan=-1) + if np.isnan(test_scores.tolist()).any(): + # replace nan with -1 + test_scores = np.nan_to_num(test_scores, nan=-1) + + # if train_scores print('train_sizes.tolist()',train_sizes.tolist()) print('train_scores', train_scores.tolist()) print('test_scores', test_scores.tolist()) From 394f0996184b22234542fd5dfcaf28fa5d632381 Mon Sep 17 00:00:00 2001 From: hyunjuna Date: Mon, 5 Dec 2022 17:13:12 -0800 Subject: [PATCH 2/2] update to deal with the case when wrong hyper-parameters are given for learning curve --- machine/learn/skl_utils.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py index c17fd3836..833a6dc04 100644 --- a/machine/learn/skl_utils.py +++ b/machine/learn/skl_utils.py @@ -1094,7 +1094,8 @@ def plot_imp_score(tmpdir, _id, coefs, feature_names, imp_score_type): def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True): - + # Plot learning curve + print("Plotting learning curve...") features = np.array(features) @@ -1147,14 +1148,15 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True): # test_scores_std = np.std(test_scores, axis=1) # if train_sizes.tolist() has nan, then replace it with 0 - # check if train_sizes.tolist has nan - if np.isnan(train_sizes.tolist()).any(): + # check if all of train_sizes.tolist has nan + + if np.isnan(train_sizes.tolist()).all(): #replace nan with -1 train_sizes = np.nan_to_num(train_sizes, nan=-1) - if np.isnan(train_scores.tolist()).any(): + if np.isnan(train_scores.tolist()).all(): # replace nan with -1 train_scores = np.nan_to_num(train_scores, nan=-1) - if np.isnan(test_scores.tolist()).any(): + if np.isnan(test_scores.tolist()).all(): # replace nan with -1 test_scores = np.nan_to_num(test_scores, nan=-1) @@ -1174,6 +1176,8 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True): def plot_pca_2d(tmpdir,_id,features,target): + + print("plot_pca_2d") # import numpy as np # import matplotlib.pyplot as plt @@ -1475,6 +1479,8 @@ def plot_tsne(tmpdir,_id,features,target): # print(X) # print(y) + print("Plotting t-SNE") + tsne = TSNE(n_components=2, verbose=1, random_state=123) X_2d = tsne.fit_transform(X) @@ -1510,7 +1516,12 @@ def plot_tsne(tmpdir,_id,features,target): # path = tmpdir + _id + '/tsneJson_' + _id + '.json' # import json + # X_2d + print("X_2d",X_2d) + print("y",y) + X_2d = [1] + y = [1] # save X and y to json file tsne_dict = {