diff --git a/.gitignore b/.gitignore
index ac87875f7..f693044b7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,4 +63,6 @@ MANIFEST
.xz
*.exe
*.mp4
-
+package-lock.json
+package.json
+package-copy.json
\ No newline at end of file
diff --git a/lab/webapp/src/components/ConfusionMatrixJSONRender/index copy later.jsx b/lab/webapp/src/components/ConfusionMatrixJSONRender/index copy later.jsx
index 2c0d45d47..de693fddf 100644
--- a/lab/webapp/src/components/ConfusionMatrixJSONRender/index copy later.jsx
+++ b/lab/webapp/src/components/ConfusionMatrixJSONRender/index copy later.jsx
@@ -196,7 +196,7 @@ import d3 from 'd3';
// test version
class TestLineChart extends Component {
componentDidMount() {
- console.log("hello");
+
// train_sizes={train_sizes}
// train_scores={train_scores}
// test_scores={test_scores}
diff --git a/lab/webapp/src/components/ConfusionMatrixJSONRender/index copy.jsx b/lab/webapp/src/components/ConfusionMatrixJSONRender/index copy.jsx
index 979e00edc..91cfe2fb1 100644
--- a/lab/webapp/src/components/ConfusionMatrixJSONRender/index copy.jsx
+++ b/lab/webapp/src/components/ConfusionMatrixJSONRender/index copy.jsx
@@ -196,7 +196,7 @@ import d3 from 'd3';
// test version
class TestLineChart extends Component {
componentDidMount() {
- console.log("hello");
+
// train_sizes={train_sizes}
// train_scores={train_scores}
// test_scores={test_scores}
diff --git a/lab/webapp/src/components/ConfusionMatrixJSONRender/index.jsx b/lab/webapp/src/components/ConfusionMatrixJSONRender/index.jsx
index 20591da8e..b02e69762 100644
--- a/lab/webapp/src/components/ConfusionMatrixJSONRender/index.jsx
+++ b/lab/webapp/src/components/ConfusionMatrixJSONRender/index.jsx
@@ -198,7 +198,6 @@ import d3 from 'd3';
// test version
class ConfusionMatrixJSONRender extends Component {
componentDidMount() {
- console.log("hello");
const { cnf_data,chartKey, chartColor, min, max } = this.props;
this.renderChart(cnf_data, chartKey, chartColor, min, max);
diff --git a/lab/webapp/src/components/Datasets/components/DatasetCard/index.jsx b/lab/webapp/src/components/Datasets/components/DatasetCard/index.jsx
index c861fc013..d6c69ff02 100644
--- a/lab/webapp/src/components/Datasets/components/DatasetCard/index.jsx
+++ b/lab/webapp/src/components/Datasets/components/DatasetCard/index.jsx
@@ -45,7 +45,7 @@ const DatasetCard = ({ dataset, recommender, toggleAI }) => {
icon_type = "line graph";
}
- console.log("DatasetCard refresh");
+ // console.log("DatasetCard refresh");
if (document.getElementById("aiTooglePopup") == null && document.getElementById("aiTooglePopupready") != null) {
diff --git a/lab/webapp/src/components/Datasets/index.jsx b/lab/webapp/src/components/Datasets/index.jsx
index 0c5b6cdec..8808a7a90 100644
--- a/lab/webapp/src/components/Datasets/index.jsx
+++ b/lab/webapp/src/components/Datasets/index.jsx
@@ -70,22 +70,6 @@ class Datasets extends Component {
const { datasets, recommender, isFetching, error, fetchDatasets, fetchRecommender } = this.props;
-
-
-
-
- // React.createElement('div', null, `Hello ${this.props.toWhat}`);
- console.log("Hello!!!!!!!!!!!!!")
-
-
-
-
-
-
-
-
-
-
if(isFetching) {
return (
@@ -105,8 +89,6 @@ class Datasets extends Component {
if (document.getElementById("aiTooglePopupready") == null) {
- console.log("aiTooglePopupready!!!")
-
// create a new div element id with aiTooglePopupready
var temp=document.createElement("div");
document.body.appendChild(temp);
diff --git a/lab/webapp/src/components/Results/components/ImportanceScoreJSON/index.jsx b/lab/webapp/src/components/Results/components/ImportanceScoreJSON/index.jsx
index 8a997b2f2..43b3b0508 100644
--- a/lab/webapp/src/components/Results/components/ImportanceScoreJSON/index.jsx
+++ b/lab/webapp/src/components/Results/components/ImportanceScoreJSON/index.jsx
@@ -80,22 +80,6 @@ function NoScore({ scoreName, scoreValueList, featureList, chartKey, chartColor,
return (
- //
-
-
- //
- );
- } else if (scoreValueList && type == "pearsonr") {
+ } else if (scoreValueList && type == "regression") {
+
+ console.log("scoreValueList && type == regression")
+
return (
-
+
+
+
+
+
);
- }
+
+ }
};
diff --git a/lab/webapp/src/components/Results/components/LearningCurveJSON/index.jsx b/lab/webapp/src/components/Results/components/LearningCurveJSON/index.jsx
index a71fab5c3..d2edbe329 100644
--- a/lab/webapp/src/components/Results/components/LearningCurveJSON/index.jsx
+++ b/lab/webapp/src/components/Results/components/LearningCurveJSON/index.jsx
@@ -58,18 +58,7 @@ function foldcheck(fold) {
function NoScore({ scoreName, train_sizes, train_scores, test_scores, chartKey, chartColor, type }) {
const getCardContent = () => {
- // if(typeof(scoreValue) !== 'number' && !scoreValueList.length)
- // {
- // if (scoreName.includes('AUC') ) {
- // return (
- //
- // );
- // } else {
- // return (
- //
- // );
- // }
- // }
+
if (train_sizes && train_scores && test_scores && type == "classification") {
diff --git a/lab/webapp/src/components/Results/components/PCAJSON/index.jsx b/lab/webapp/src/components/Results/components/PCAJSON/index.jsx
index 9e66ac8ad..622a32767 100644
--- a/lab/webapp/src/components/Results/components/PCAJSON/index.jsx
+++ b/lab/webapp/src/components/Results/components/PCAJSON/index.jsx
@@ -83,27 +83,12 @@ function foldcheck(fold) {
if (Points && Labels && type == "classification") {
- console.log("i am here in no score");
- console.log('labels', Labels);
+ // console.log("i am here in no score");
+ // console.log('labels', Labels);
return (
- //
-
- //
// Points, Labels, chartKey, chartColor, type
@@ -118,27 +103,7 @@ function foldcheck(fold) {
);
}
- // else if (LabelsList && type == "r2_or_vaf") {
- // return (
- //
- // );
- // } else if (LabelsList && type == "pearsonr") {
- // return (
- //
- // );
- // }
+
};
diff --git a/lab/webapp/src/components/Results/index.jsx b/lab/webapp/src/components/Results/index.jsx
index a03bd0f1f..c99fc5e8c 100644
--- a/lab/webapp/src/components/Results/index.jsx
+++ b/lab/webapp/src/components/Results/index.jsx
@@ -523,13 +523,31 @@ class Results extends Component {
finishTime={experiment.data.finished}
launchedBy={experiment.data.launched_by}
/>
-
+ {/* */}
+
+
+ {/* */}
+
+
,
document.getElementById('app')
diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py
index 7c1edc57a..000bcf51f 100644
--- a/machine/learn/skl_utils.py
+++ b/machine/learn/skl_utils.py
@@ -42,7 +42,7 @@
import os
import matplotlib.pyplot as plt
from matplotlib import rcParams
-import shap
+import shap
import numpy as np
import pandas as pd
import matplotlib as mpl
@@ -53,8 +53,6 @@
from sklearn.model_selection import learning_curve
-
-
mpl.use('Agg')
# if system environment allows to export figures
@@ -78,10 +76,12 @@
# Number of samples used for SHAP Explainers
max_samples_kernel_explainer = 50
if 'MACHINE_SHAP_SAMPLES_KERNEL_EXPLAINER' in os.environ:
- max_samples_kernel_explainer = int(os.environ['MACHINE_SHAP_SAMPLES_KERNEL_EXPLAINER'])
+ max_samples_kernel_explainer = int(
+ os.environ['MACHINE_SHAP_SAMPLES_KERNEL_EXPLAINER'])
max_samples_other_explainer = 100
if 'MACHINE_SHAP_SAMPLES_OTHER_EXPLAINER' in os.environ:
- max_samples_other_explainer = int(os.environ['MACHINE_SHAP_SAMPLES_OTHER_EXPLAINER'])
+ max_samples_other_explainer = int(
+ os.environ['MACHINE_SHAP_SAMPLES_OTHER_EXPLAINER'])
def balanced_accuracy(y_true, y_pred):
@@ -147,6 +147,7 @@ def pearsonr(y_true, y_pred):
SCORERS['balanced_accuracy'] = make_scorer(balanced_accuracy)
SCORERS['pearsonr'] = make_scorer(pearsonr)
+
def get_column_names_from_ColumnTransformer(column_transformer, feature_names):
"""Get column names after applying Column Transformations
@@ -160,12 +161,12 @@ def get_column_names_from_ColumnTransformer(column_transformer, feature_names):
-------
new_feature_names: list of strings
Feature names generated after column transformations
- """
+ """
new_feature_names = []
for transformer_in_columns in column_transformer.transformers_:
- _, transformer, col_indices = transformer_in_columns
+ _, transformer, col_indices = transformer_in_columns
feature_columns = [feature_names[i] for i in col_indices]
- try: ## Only works for OneHotEncoder transforms
+ try: # Only works for OneHotEncoder transforms
names = transformer.get_feature_names(feature_columns)
new_feature_names += list(names)
except:
@@ -245,22 +246,20 @@ def generate_results(model, input_data,
num_classes = input_data[target_name].unique().shape[0]
features = input_data.drop(target_name, axis=1).values
target = input_data[target_name].values
-
- target_arr = np.array(target)
- len_target=len(target_arr)
+
+ target_arr = np.array(target)
+ len_target = len(target_arr)
classes = list(set(target))
- class_perc={}
+ class_perc = {}
for OneClass in classes:
- occ = np.count_nonzero(target_arr==OneClass)
- class_perc["class_"+str(OneClass)]= occ/len_target
-
-
- # show percentage of each class
+ occ = np.count_nonzero(target_arr == OneClass)
+ class_perc["class_"+str(OneClass)] = occ/len_target
+
+ # show percentage of each class
save_json_fmt(outdir=tmpdir, _id=_id,
fname="class_percentage.json", content=class_perc)
-
features, target = check_X_y(
features, target, dtype=None, order="C", force_all_finite=True)
@@ -302,9 +301,9 @@ def generate_results(model, input_data,
model = make_pipeline(ct, model)
scores = {}
- #print('Args used in model:', model.get_params())
+ # print('Args used in model:', model.get_params())
if mode == "classification":
- if(num_classes > 2):
+ if (num_classes > 2):
scoring = ["balanced_accuracy",
"precision_macro",
"recall_macro",
@@ -312,7 +311,7 @@ def generate_results(model, input_data,
scores['roc_auc_score'] = 'not supported for multiclass'
scores['train_roc_auc_score'] = 'not supported for multiclass'
else:
-
+
# https://en.wikipedia.org/wiki/Confusion_matrix
# scoring = ["balanced_accuracy",
# "precision",
@@ -325,7 +324,6 @@ def generate_results(model, input_data,
"f1",
"roc_auc"]
-
metric = "accuracy"
else:
scoring = ["r2",
@@ -339,7 +337,7 @@ def generate_results(model, input_data,
warnings.simplefilter('ignore')
if param_grid:
print("param_grid")
-
+
if isinstance(model, Pipeline):
parameters = {}
for key, val in param_grid.items():
@@ -356,7 +354,7 @@ def generate_results(model, input_data,
verbose=0,
error_score=-float('inf'),
return_train_score=True)
-
+
clf.fit(features, target)
cv_results = clf.cv_results_
# rename params name from pipeline object
@@ -374,12 +372,10 @@ def generate_results(model, input_data,
model = clf.best_estimator_
else:
print("param_grid else")
- plot_learning_curve(tmpdir,_id, model,features,target,cv,return_times=True)
+ plot_learning_curve(tmpdir, _id, model, features,
+ target, cv, return_times=True)
model.fit(features, target)
-
-
-
# # plot learning curve
# plot_learning_curve(tmpdir,_id, model,features,target,cv,return_times=True)
@@ -393,8 +389,6 @@ def generate_results(model, input_data,
return_train_score=True,
return_estimator=True
)
-
-
for s in scoring:
train_scores = cv_scores['train_' + s]
@@ -439,40 +433,31 @@ def generate_results(model, input_data,
'feature_importance_type': imp_score_type
}
- print("feature_importances",feature_importances)
+ print("feature_importances", feature_importances)
- top_feature_importances={}
+ top_feature_importances = {}
# if size of feature_importances is greater than 10, then
# only top 10 features whose feature_importances are greater than 0 are returned
if len(feature_importances['feature_importances']) > 10:
for i in range(len(feature_importances['feature_importances'])):
if feature_importances['feature_importances'][i] >= 0:
- top_feature_importances[feature_importances['feature_names'][i]] = feature_importances['feature_importances'][i]
-
+ top_feature_importances[feature_importances['feature_names']
+ [i]] = feature_importances['feature_importances'][i]
+
# sort the dictionary in descending order of feature_importances
- top_feature_importances = dict(sorted(top_feature_importances.items(), key=lambda item: item[1], reverse=True))
+ top_feature_importances = dict(
+ sorted(top_feature_importances.items(), key=lambda item: item[1], reverse=True))
# get top 10 features
- top_feature_importances = dict(list(top_feature_importances.items())[:10])
+ top_feature_importances = dict(
+ list(top_feature_importances.items())[:10])
- # print("top_feature_importances",top_feature_importances)
-
- # add 'feature_importance_type': imp_score_type to top_feature_importances
- # top_feature_importances['feature_importance_type'] = imp_score_type
-
-
- # make top_feature_importances format like feature_importances
top_feature_importances = {
'feature_names': list(top_feature_importances.keys()),
'feature_importances': list(top_feature_importances.values()),
'feature_importance_type': imp_score_type
}
- # print("new_top_feature_importances",top_feature_importances)
-
feature_importances = top_feature_importances
-
-
-
save_json_fmt(
outdir=tmpdir,
@@ -502,35 +487,35 @@ def generate_results(model, input_data,
model.classes_,
cv_scores,
figure_export)
- # plot pca
-
- plot_pca_2d(tmpdir,_id,features,target)
+
+ plot_pca_2d(tmpdir, _id, features, target)
# plot_pca_3d(tmpdir,_id,features,target)
# plot_pca_3d_iris(tmpdir,_id,features,target)
- plot_tsne_2d(tmpdir,_id,features,target)
+ plot_tsne_2d(tmpdir, _id, features, target)
if type(model).__name__ == 'Pipeline':
step_names = [step[0] for step in model.steps]
column_transformer = model[step_names[0]]
classifier_model = model[step_names[1]]
- modified_feature_names = get_column_names_from_ColumnTransformer(column_transformer, feature_names)
+ modified_feature_names = get_column_names_from_ColumnTransformer(
+ column_transformer, feature_names)
modified_features = column_transformer.transform(features.copy())
plot_shap_analysis_curve(tmpdir,
- _id,
- classifier_model,
- modified_features,
- modified_feature_names,
- classifier_model.classes_,
- target)
+ _id,
+ classifier_model,
+ modified_features,
+ modified_feature_names,
+ classifier_model.classes_,
+ target)
else:
plot_shap_analysis_curve(tmpdir,
- _id,
- model,
- features.copy(), # Send a copy of features as it may get modified
- feature_names,
- model.classes_,
- target)
-
+ _id,
+ model,
+ features.copy(), # Send a copy of features as it may get modified
+ feature_names,
+ model.classes_,
+ target)
+
if num_classes == 2:
plot_roc_curve(
tmpdir,
@@ -540,8 +525,6 @@ def generate_results(model, input_data,
cv_scores,
figure_export)
-
-
else: # regression
if figure_export:
plot_cv_pred(tmpdir, _id, features, target, cv_scores)
@@ -806,9 +789,8 @@ def plot_shap_analysis_curve(
# convert target to Pandas Series type if not already
y_test = pd.Series(target)
-
# Sample 100 examples for Tree Explainer / Linear Explainer
- if model_name in ['decisiontreeclassifier','randomforestclassifier','logisticregression','linearsvc']:
+ if model_name in ['decisiontreeclassifier', 'randomforestclassifier', 'logisticregression', 'linearsvc']:
max_num_samples = max_samples_other_explainer
elif model_name == 'gradientboostingclassifier' and len(class_names) == 2:
max_num_samples = max_samples_other_explainer
@@ -816,7 +798,8 @@ def plot_shap_analysis_curve(
else:
max_num_samples = max_samples_kernel_explainer
num_samples = min(max_num_samples, len(features))
- sampled_row_indices = np.random.choice(features.shape[0], size=num_samples, replace=False)
+ sampled_row_indices = np.random.choice(
+ features.shape[0], size=num_samples, replace=False)
features = features[sampled_row_indices]
y_test = y_test[sampled_row_indices].reset_index(drop=True)
@@ -846,7 +829,8 @@ def plot_shap_analysis_curve(
# KernelExplainer
explainer = shap.KernelExplainer(model.predict_proba, features)
# l1_reg not set to 'auto' to subside warnings
- shap_values = explainer.shap_values(features, l1_reg='num_features(10)')
+ shap_values = explainer.shap_values(
+ features, l1_reg='num_features(10)')
expected_values = explainer.expected_value
# Generate predictions for the final features
@@ -860,15 +844,18 @@ def plot_shap_analysis_curve(
# Handle the case of multi-class SHAP outputs
if isinstance(shap_values, list):
for i, class_name in enumerate(class_names):
- save_path = tmpdir + _id + '/shap_summary_curve' + _id + '_' + str(class_name) + '_.png'
- examples_subset_index, misclassified = get_example_subset(y_predictions, y_test, i)
+ save_path = tmpdir + _id + '/shap_summary_curve' + \
+ _id + '_' + str(class_name) + '_.png'
+ examples_subset_index, misclassified = get_example_subset(
+ y_predictions, y_test, i)
combine_summary_decision_curve(
shap_values[i], expected_values[i], features, feature_names,
n_features, examples_subset_index, misclassified, link, save_path
)
else:
save_path = tmpdir + _id + '/shap_summary_curve' + _id + '_0_.png'
- examples_subset_index, misclassified = get_example_subset(y_predictions, y_test, class_names[1])
+ examples_subset_index, misclassified = get_example_subset(
+ y_predictions, y_test, class_names[1])
combine_summary_decision_curve(
shap_values, expected_values, features, feature_names,
n_features, examples_subset_index, misclassified, link, save_path
@@ -878,7 +865,7 @@ def plot_shap_analysis_curve(
shap_summary_dict = {
'shap_explainer': explainer.__class__.__name__,
'shap_num_samples': num_samples,
- #'shap_values': shap_values
+ # 'shap_values': shap_values
}
save_json_fmt(outdir=tmpdir, _id=_id,
@@ -938,7 +925,8 @@ def combine_summary_decision_curve(
shap_value[examples_subset_index],
features[examples_subset_index, :],
feature_names=list(feature_names),
- feature_display_range=slice(None, -(n_features + 1), -1),
+ feature_display_range=slice(
+ None, -(n_features + 1), -1),
ignore_warnings=True,
highlight=misclassified,
show=False,
@@ -988,7 +976,7 @@ def plot_roc_curve(tmpdir, _id, X, y, cv_scores, figure_export):
probas_ = est.predict_proba(X[test])[:, 1]
except AttributeError:
probas_ = est.decision_function(X[test])
- #print(SCORERS['roc_auc'](est, X[train], y[train]))
+ # print(SCORERS['roc_auc'](est, X[train], y[train]))
# Compute ROC curve and area the curve
classes_encoded = np.array(
@@ -1085,7 +1073,8 @@ def plot_imp_score(tmpdir, _id, coefs, feature_names, imp_score_type):
plt.close()
return top_features, indices
-def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
+
+def plot_learning_curve(tmpdir, _id, model, features, target, cv, return_times=True):
"""Make learning curve.
Parameters
@@ -1106,17 +1095,14 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
None
"""
-
-
-
features = np.array(features)
target = np.array(target)
target[target == -1] = 0
-
- train_sizes, train_scores, test_scores, fit_times, _ = learning_curve(model,features,target,None, np.linspace(0.1, 1.0, 5), cv,return_times=True)
-
+ train_sizes, train_scores, test_scores, fit_times, _ = learning_curve(
+ model, features, target, None, np.linspace(0.1, 1.0, 5), cv, return_times=True)
+
plt.xlabel("Training examples")
plt.ylabel("Score")
plt.ylim([-0.05, 1.05])
@@ -1128,30 +1114,26 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
plt.grid()
-
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1,
color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1, color="b")
+ plt.plot(train_sizes, np.mean(train_scores, axis=1),
+ 'r', label=r'Training score')
+ plt.plot(train_sizes, np.mean(test_scores, axis=1),
+ 'b', label=r'Cross-validation score')
-
- plt.plot(train_sizes,np.mean(train_scores,axis=1),'r',label=r'Training score')
- plt.plot(train_sizes,np.mean(test_scores,axis=1),'b',label=r'Cross-validation score')
-
plt.title('Learning curve')
-
+
plt.legend(loc='best')
plt.savefig(tmpdir + _id + '/learning_curve_' + _id + '.png')
-
plt.close()
-
-
if np.isnan(train_sizes.tolist()).all():
- #replace nan with -1
+ # replace nan with -1
train_sizes = np.nan_to_num(train_sizes, nan=-1)
if np.isnan(train_scores.tolist()).all():
# replace nan with -1
@@ -1161,16 +1143,16 @@ def plot_learning_curve(tmpdir,_id,model,features,target,cv,return_times=True):
test_scores = np.nan_to_num(test_scores, nan=-1)
learning_curve_dict = {
- 'train_sizes':train_sizes.tolist(),
+ 'train_sizes': train_sizes.tolist(),
'train_scores': train_scores.tolist(),
'test_scores': test_scores.tolist()
-
+
}
save_json_fmt(outdir=tmpdir, _id=_id,
fname="learning_curve.json", content=learning_curve_dict)
-def plot_pca_2d(tmpdir,_id,features,target):
+def plot_pca_2d(tmpdir, _id, features, target):
"""Make PCA on 2D.
Parameters
@@ -1179,7 +1161,7 @@ def plot_pca_2d(tmpdir,_id,features,target):
Temporary directory for saving 2d pca plot and json file
_id: string
Experiment ID in Aliro
-
+
features: np.darray/pd.DataFrame
Features in training dataset
target: np.darray/pd.DataFrame
@@ -1191,14 +1173,8 @@ def plot_pca_2d(tmpdir,_id,features,target):
"""
X = np.array(features)
y = np.array(target)
-
- print(set(y))
-
-
-
-
-
+ print(set(y))
plt.cla()
pca = decomposition.PCA(n_components=2)
@@ -1206,44 +1182,38 @@ def plot_pca_2d(tmpdir,_id,features,target):
pca.fit(X)
X = pca.transform(X)
-
-
num_classes = len(set(y))
# generate the number of colors equal to the number of classes
colors = plt.cm.Set1(np.linspace(0, 1, num_classes))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=mcolors.ListedColormap(colors))
# plot the legend where the colors are mapped to the classes
- plt.legend(handles=[Patch(color=colors[i], label="class_"+str(i)) for i in range(num_classes)])
-
+ plt.legend(handles=[Patch(color=colors[i], label="class_"+str(i))
+ for i in range(num_classes)])
# write x axis as pc1 and y axis as pc2
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.savefig(tmpdir + _id + '/pca_' + _id + '.png')
- plt.close()
-
+ plt.close()
# save X and y to json file
pca_dict = {
+
'X_pca': X.tolist(),
'y_pca': y.tolist()
}
-
# save json file
save_json_fmt(outdir=tmpdir, _id=_id,
fname="pca-json.json", content=pca_dict)
-
-def plot_pca_3d(tmpdir,_id,features,target):
+def plot_pca_3d(tmpdir, _id, features, target):
# import numpy as np
# import matplotlib.pyplot as plt
-
-
# from sklearn import datasets
# np.random.seed(5)
@@ -1254,12 +1224,9 @@ def plot_pca_3d(tmpdir,_id,features,target):
y = np.array(target)
y[y == -1] = 0
-
# print(X)
# print(y)
-
-
fig = plt.figure(1, figsize=(4, 3))
plt.clf()
@@ -1267,7 +1234,6 @@ def plot_pca_3d(tmpdir,_id,features,target):
# ax = fig.add_subplot(111, projection="2d", elev=48, azim=134)
ax.set_position([0, 0, 0.95, 1])
-
plt.cla()
pca = decomposition.PCA(n_components=3)
# pca = decomposition.PCA(n_components=2)
@@ -1284,10 +1250,9 @@ def plot_pca_3d(tmpdir,_id,features,target):
# bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"),
# )
-
- classes_y=list(set(y))
+ classes_y = list(set(y))
for each_classes_y in classes_y:
- name_label=(str(each_classes_y), each_classes_y)
+ name_label = (str(each_classes_y), each_classes_y)
name = name_label[0]
label = name_label[1]
ax.text3D(
@@ -1310,7 +1275,6 @@ def plot_pca_3d(tmpdir,_id,features,target):
# horizontalalignment="center",
# bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"),
# )
-
# Reorder the labels to have colors matching the cluster results
# y = np.choose(y, [1, 2, 0]).astype(float)
@@ -1323,15 +1287,11 @@ def plot_pca_3d(tmpdir,_id,features,target):
# y = np.choose(y, [0, 1]).astype(float)
print(y)
- ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral, edgecolor="k")
+ ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y,
+ cmap=plt.cm.nipy_spectral, edgecolor="k")
# ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y)
# show which color is which class label based on scatter plot
-
-
-
-
-
print("X")
print(X)
@@ -1348,76 +1308,7 @@ def plot_pca_3d(tmpdir,_id,features,target):
plt.close()
-
-def plot_pca_3d_iris(tmpdir,_id,features,target):
-
-
- # import numpy as np
- # import matplotlib.pyplot as plt
-
-
- from sklearn import decomposition
- from sklearn import datasets
-
- np.random.seed(5)
-
- iris = datasets.load_iris()
- X = iris.data
- y = iris.target
-
- # print(X)
-
- # print(y)
-
-
-
- # print(X)
- # print(y)
-
-
-
- fig = plt.figure(1, figsize=(4, 3))
- plt.clf()
-
- ax = fig.add_subplot(111, projection="3d", elev=48, azim=134)
- # ax = fig.add_subplot(111, projection="2d", elev=48, azim=134)
- ax.set_position([0, 0, 0.95, 1])
-
-
- plt.cla()
- pca = decomposition.PCA(n_components=3)
- # pca = decomposition.PCA(n_components=2)
- pca.fit(X)
- X = pca.transform(X)
-
- for name, label in [("Setosa", 0), ("Versicolour", 1), ("Virginica", 2)]:
- ax.text3D(
- X[y == label, 0].mean(),
- X[y == label, 1].mean() + 1.5,
- X[y == label, 2].mean(),
- name,
- horizontalalignment="center",
- bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"),
- )
-
-
- # Reorder the labels to have colors matching the cluster results
- y = np.choose(y, [1, 2, 0]).astype(float)
- ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.nipy_spectral, edgecolor="k")
-
-
-
-
- ax.w_xaxis.set_ticklabels([])
- ax.w_yaxis.set_ticklabels([])
- ax.w_zaxis.set_ticklabels([])
-
- # plt.show()
- plt.savefig(tmpdir + _id + '/pca_' + _id + '.png')
- plt.close()
-
-def plot_tsne_2d(tmpdir,_id,features,target):
-
+def plot_tsne_2d(tmpdir, _id, features, target):
"""Make tsne on 2D.
Parameters
@@ -1426,7 +1317,7 @@ def plot_tsne_2d(tmpdir,_id,features,target):
Temporary directory for saving 2d t-sne plot and json file
_id: string
Experiment ID in Aliro
-
+
features: np.darray/pd.DataFrame
Features in training dataset
target: np.darray/pd.DataFrame
@@ -1438,49 +1329,37 @@ def plot_tsne_2d(tmpdir,_id,features,target):
None
"""
-
-
-
tsne = TSNE(n_components=2, verbose=1, random_state=123)
X_2d = tsne.fit_transform(features)
-
# version 2
num_classes = len(set(target))
# generate the number of colors equal to the number of classes
colors = plt.cm.Set1(np.linspace(0, 1, num_classes))
- plt.scatter(X_2d[:,0], X_2d[:,1], c=target, cmap=mcolors.ListedColormap(colors))
+ plt.scatter(X_2d[:, 0], X_2d[:, 1], c=target,
+ cmap=mcolors.ListedColormap(colors))
# plot the legend where the colors are mapped to the classes
- plt.legend(handles=[Patch(color=colors[i], label="class_"+str(i)) for i in range(num_classes)])
-
+ plt.legend(handles=[Patch(color=colors[i], label="class_"+str(i))
+ for i in range(num_classes)])
# write x axis as pc1 and y axis as pc2
plt.xlabel('comp-1')
plt.ylabel('comp-2')
-
-
# plt.show()
plt.savefig(tmpdir + _id + '/tsne_' + _id + '.png')
plt.close()
-
-
-
# save X and y to json file
tsne_dict = {
'X_tsne': X_2d.tolist(),
'y_tsne': target.tolist()
}
-
save_json_fmt(outdir=tmpdir, _id=_id,
fname="tsne-json.json", content=tsne_dict)
-
-
-
def plot_dot_plot(tmpdir, _id, features,
target,
@@ -1596,8 +1475,8 @@ def plot_cv_pred(tmpdir, _id, X, y, cv_scores):
ax.set_xlabel('Predicted Values')
ax.set_ylabel('Residuals')
ax.axhline(y=0.0,
- color="red",
- linestyle='dashed')
+ color="red",
+ linestyle='dashed')
plt.tight_layout()
plt.savefig(tmpdir + _id + '/reg_cv_resi_' + _id + '.png')
plt.close()
@@ -1610,14 +1489,27 @@ def plot_cv_pred(tmpdir, _id, X, y, cv_scores):
ax.set_title("Q-Q Plot for Normalized Residuals")
x = np.linspace(*ax.get_xlim())
ax.plot(x, x,
- color="red",
- linestyle='dashed')
+ color="red",
+ linestyle='dashed')
ax.set_xlabel('Theoretical Quantiles')
ax.set_ylabel('Ordered Normalized Residuals')
plt.tight_layout()
plt.savefig(tmpdir + _id + '/reg_cv_qq_' + _id + '.png')
plt.close()
+ reg_cv_pred_resi_qq = {
+ 'y': y.tolist(),
+ 'pred_y': pred_y.tolist(),
+ 'resi_y': resi_y.tolist(),
+
+ 'z': z.tolist(),
+ 'series1_zero': series1[0][0].tolist(),
+ 'series1_one': series1[0][1].tolist(),
+ }
+
+ save_json_fmt(outdir=tmpdir, _id=_id,
+ fname="reg_cv_pred_resi_qq.json", content=reg_cv_pred_resi_qq)
+
def export_model(tmpdir,
_id,
@@ -1677,7 +1569,7 @@ def generate_export_codes(
mode="classification",
random_state=42):
"""Generate all library import calls for use in stand alone python scripts.
-
+
Parameters
----------
pickle_file_name: string
@@ -1856,5 +1748,3 @@ def balanced_accuracy(y_true, y_pred):
"""
return exported_codes_1, exported_codes_2
-
-
diff --git a/raspberrypi/productpage/other_Versions/productpage_version_0/package copy.json b/raspberrypi/productpage/other_Versions/productpage_version_0/package copy.json
deleted file mode 100644
index 148be0a94..000000000
--- a/raspberrypi/productpage/other_Versions/productpage_version_0/package copy.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
- "name": "aliro_ed_album_template_product_rp_dep_electron",
- "version": "1.0.0",
- "description": "Develop Aliro Ed cross-platform application with Electron",
- "main": "index.js",
- "scripts": {
- "test": "echo \\\"Error: no test specified\\\" && exit 1"
- },
- "author": "Hyunjun Choi",
- "license": "GPL-3.0"
-}