From b4297b9c493780b8bc448ac1f1635e3793e46838 Mon Sep 17 00:00:00 2001
From: choi
Date: Tue, 28 Nov 2023 11:01:23 -0800
Subject: [PATCH 01/15] working on sidemenu
---
.../components/AlgorithmOptions/index.jsx | 43 +++++++---
lab/webapp/src/components/ChatGPT/SideMenu.js | 3 +
.../Datasets/components/DatasetCard/index.jsx | 81 +++----------------
3 files changed, 43 insertions(+), 84 deletions(-)
diff --git a/lab/webapp/src/components/Builder/components/AlgorithmOptions/index.jsx b/lab/webapp/src/components/Builder/components/AlgorithmOptions/index.jsx
index a89278a9d..598a017e1 100644
--- a/lab/webapp/src/components/Builder/components/AlgorithmOptions/index.jsx
+++ b/lab/webapp/src/components/Builder/components/AlgorithmOptions/index.jsx
@@ -49,16 +49,29 @@ function AlgorithmOptions({
function openTrueOrFalse_algorithm_popup() {
if (localStorage.getItem("algorithm-popup") == "true") {
- // if (document.getElementById("aiTooglePopup")!==null){
- // document.getElementById("aiTooglePopup").style.cssText = "display: block !important";
- // }
-
return false;
} else {
return true;
}
}
+ // Function to determine the OS type
+ function getOsType() {
+ const userAgent = navigator.userAgent.toLowerCase();
+
+ if (userAgent.includes("win")) return "Windows";
+ if (userAgent.includes("mac")) return "macOS";
+ if (userAgent.includes("linux")) {
+ // Check user agent for Raspberry Pi OS identification
+ if (userAgent.includes("raspberry")) return "Raspberry Pi OS";
+ return "Linux";
+ }
+ if (userAgent.includes("iphone") || platform.includes("ipad")) return "iOS";
+ if (userAgent.includes("android")) return "Android";
+
+ return "Unknown OS";
+ }
+
return (
@@ -99,14 +112,20 @@ function AlgorithmOptions({
content={
}
trigger={
diff --git a/lab/webapp/src/components/ChatGPT/SideMenu.js b/lab/webapp/src/components/ChatGPT/SideMenu.js
index 407cf6525..2c071fedb 100644
--- a/lab/webapp/src/components/ChatGPT/SideMenu.js
+++ b/lab/webapp/src/components/ChatGPT/SideMenu.js
@@ -4,6 +4,8 @@ import { ThemeContext } from "./context/ThemeContext";
import { AllContext } from "./context/AllContext";
+import { Icon } from "semantic-ui-react";
+
export default function SideMenu() {
const {
currentModel,
@@ -237,6 +239,7 @@ export default function SideMenu() {
.slice(3)
.forEach((node) => {
node.childNodes[1].style.display = "none";
+ //
node.childNodes[1].innerHTML = "đď¸";
});
}
diff --git a/lab/webapp/src/components/Datasets/components/DatasetCard/index.jsx b/lab/webapp/src/components/Datasets/components/DatasetCard/index.jsx
index dcc322fcd..3b5c84271 100644
--- a/lab/webapp/src/components/Datasets/components/DatasetCard/index.jsx
+++ b/lab/webapp/src/components/Datasets/components/DatasetCard/index.jsx
@@ -34,6 +34,7 @@ import * as actions from "data/datasets/dataset/actions";
import DatasetActions from "./components/DatasetActions";
import BestResult from "./components/BestResult";
import ExperimentStatus from "./components/ExperimentStatus";
+
import {
Grid,
Segment,
@@ -41,6 +42,7 @@ import {
Button,
Popup,
Message,
+ Icon,
} from "semantic-ui-react";
import { formatDataset, formatDatasetOuter } from "../../../../utils/formatter";
@@ -55,13 +57,6 @@ const DatasetCard = ({ dataset, recommender, toggleAI }) => {
icon_type = "line graph";
}
- // const [showGrid, setShowGrid] = useState(false);
-
- // const handleGridClick = () => {
- // console.log("handleGridClick");
- // setShowGrid(false);
- // };
-
function clickDatasetCardDelButton(e) {
let parent = e.target.closest(".dataset-card");
// console.log(parent);
@@ -278,11 +273,12 @@ const DatasetCard = ({ dataset, recommender, toggleAI }) => {
- đ
+ {/* use trash icon from sementic ui */}
+
@@ -435,11 +431,11 @@ const DatasetCard = ({ dataset, recommender, toggleAI }) => {
- đ
+
{
);
}
-
- // return (
- //
-
- //
- //
- // }
- // />
- //
- //
- //
- //
-
- //
- //
- //
-
- //
- // );
};
export { DatasetCard };
export default connect(null, actions)(DatasetCard);
-// get aiTooglePopup id
-// if aiTooglePopup is not null, then hide it
-// wait until document.getElementById("aiTooglePopup") loaded
-
setTimeout(function () {
if (document.getElementById("aiTooglePopup") != null) {
console.log("aiTooglePopup is not null");
@@ -538,11 +481,5 @@ setTimeout(function () {
// length of aiTooglePopups
var len = aiTooglePopups.length;
console.log(len);
-
- // if (aiTooglePopups != null) {
- // for (var i = 1; i < aiTooglePopups.length; i++) {
- // aiTooglePopups[i].style.cssText += ';display:none !important;';
- // }
- // }
}
}, 100);
From 59dd2fe428b77d4492bb4a75e09cea1ef869ee2e Mon Sep 17 00:00:00 2001
From: choi
Date: Tue, 28 Nov 2023 11:03:42 -0800
Subject: [PATCH 02/15] replace Aliro with AliroEd. Update footer text
"Developed by the Center for AI Research and Education (CAIRE) in the
Department of Computational Biomedicine at Cedars-Sinai Medical Center in Los
Angeles, California, USA."
---
raspberrypi/intropage/index.html | 22 ++-----------------
raspberrypi/intropage/infAndDownloadpage.html | 8 +++++--
raspberrypi/intropage/int_ml_01.html | 4 +++-
raspberrypi/intropage/int_ml_02.html | 5 +++--
.../intropage/int_ml_03_decision_tree.html | 4 +++-
5 files changed, 17 insertions(+), 26 deletions(-)
diff --git a/raspberrypi/intropage/index.html b/raspberrypi/intropage/index.html
index 7bccc0da7..2955bd33b 100644
--- a/raspberrypi/intropage/index.html
+++ b/raspberrypi/intropage/index.html
@@ -251,14 +251,14 @@
-
Are you ready to run Aliro?
+
Are you ready to run AliroEd?
When Raspberry Pi is ready to run AliroEd, the below button shows
- "Alio is ready to run". Please click the button to run Aliro On
+ "Alio is ready to run". Please click the button to run AliroEd On
Raspberry Pi 4
@@ -268,15 +268,6 @@
Are you ready to run Aliro?
-
-
❮
❯
@@ -389,15 +380,6 @@ AliroEd_3
-
diff --git a/raspberrypi/intropage/int_ml_01.html b/raspberrypi/intropage/int_ml_01.html
index c1759433a..180d00df8 100644
--- a/raspberrypi/intropage/int_ml_01.html
+++ b/raspberrypi/intropage/int_ml_01.html
@@ -461,7 +461,9 @@ Drawing boundaries
Back to top
-
+
diff --git a/raspberrypi/intropage/int_ml_02.html b/raspberrypi/intropage/int_ml_02.html
index ce55831b6..f9e630326 100644
--- a/raspberrypi/intropage/int_ml_02.html
+++ b/raspberrypi/intropage/int_ml_02.html
@@ -457,8 +457,9 @@
Date: Tue, 28 Nov 2023 11:38:16 -0800
Subject: [PATCH 03/15] fixed typo. updated the broken download link.
---
raspberrypi/intropage/index.html | 6 +++---
raspberrypi/intropage/infAndDownloadpage.html | 5 +++--
raspberrypi/intropage/int_ml_02.html | 5 +----
3 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/raspberrypi/intropage/index.html b/raspberrypi/intropage/index.html
index 2955bd33b..e974e0908 100644
--- a/raspberrypi/intropage/index.html
+++ b/raspberrypi/intropage/index.html
@@ -242,7 +242,7 @@ Welcome to the AliroEd. AliroEd is for users who are not familiar with machine learning. Users can experience machine learning tasks on Raspberry Pi 4 with AliroEd. If internet is available, Users can download the raspberry pi image from the AliroEd website .
Users can easily install and run AliroEd on Raspberry Pi 4 using this image. AliroEd and the Raspberry Pi image provided in this website, are developed by the Center for AI Research and Education (CAIRE) in the
Department of Computational Biomedicine at Cedars-Sinai Medical Center
in Los Angeles, California, USA.
diff --git a/raspberrypi/intropage/int_ml_02.html b/raspberrypi/intropage/int_ml_02.html
index f9e630326..435549dde 100644
--- a/raspberrypi/intropage/int_ml_02.html
+++ b/raspberrypi/intropage/int_ml_02.html
@@ -209,10 +209,7 @@
>
has the highest average of sepal-length.
-
- Please click the Y axis. It will show an example to put
- boundaries
-
+
Please click the Y axis. It will show an example to put
boundaries to distinguish classes of iris flowers. Then please
From 8f63ff5d6a1d25e8dd7fc830966bbb92d2f19c48 Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 8 Mar 2024 20:00:56 -0800
Subject: [PATCH 04/15] Temporary fix to handle NaN values
---
.../src/components/Results/index-twoside.jsx | 416 ++---
lab/webapp/src/components/Results/index.jsx | 2 +-
.../ResultsV2/components/Score/index.jsx | 2 +
lab/webapp/src/components/ResultsV2/index.jsx | 1645 +++++++++--------
machine/learn/skl_utils.py | 138 +-
5 files changed, 1172 insertions(+), 1031 deletions(-)
diff --git a/lab/webapp/src/components/Results/index-twoside.jsx b/lab/webapp/src/components/Results/index-twoside.jsx
index 6c31c04be..99ab33729 100644
--- a/lab/webapp/src/components/Results/index-twoside.jsx
+++ b/lab/webapp/src/components/Results/index-twoside.jsx
@@ -28,34 +28,34 @@ along with this program. If not, see .
(Autogenerated header, do not modify)
*/
-import React, { Component } from 'react';
-import { connect } from 'react-redux';
-import * as actions from 'data/experiments/selected/actions';
-import SceneHeader from '../SceneHeader';
-import FetchError from '../FetchError';
-import AlgorithmDetails from './components/AlgorithmDetails';
-import RunDetails from './components/RunDetails'
-import MSEMAEDetails from './components/MSEMAEDetails';;
-import ConfusionMatrix from './components/ConfusionMatrix';
-import ConfusionMatrixJSON from './components/ConfusionMatrixJSON';
-import ROCCurve from './components/ROCCurve';
-import ShapSummaryCurve from './components/ShapSummaryCurve';
-import ImportanceScore from './components/ImportanceScore';
-import ImportanceScoreJSON from './components/ImportanceScoreJSON';
-import LearningCurve from './components/LearningCurve';
-import LearningCurveJSON from './components/LearningCurveJSON';
-import TestChart from './components/TestChart';
-import PCA from './components/PCA';
-import PCAJSON from './components/PCAJSON';
-import TSNE from './components/TSNE';
-import TSNEJSON from './components/TSNEJSON';
-import RegFigure from './components/RegFigure';
-import Score from './components/Score';
-import NoScore from './components/NoScore';
-import { Header, Grid, Loader, Dropdown, Menu} from 'semantic-ui-react';
-import { formatDataset } from 'utils/formatter';
-import ClassRate from './components/ClassRate';
-import ChatGPT from '../ChatGPT';
+import React, { Component } from "react";
+import { connect } from "react-redux";
+import * as actions from "data/experiments/selected/actions";
+import SceneHeader from "../SceneHeader";
+import FetchError from "../FetchError";
+import AlgorithmDetails from "./components/AlgorithmDetails";
+import RunDetails from "./components/RunDetails";
+import MSEMAEDetails from "./components/MSEMAEDetails";
+import ConfusionMatrix from "./components/ConfusionMatrix";
+import ConfusionMatrixJSON from "./components/ConfusionMatrixJSON";
+import ROCCurve from "./components/ROCCurve";
+import ShapSummaryCurve from "./components/ShapSummaryCurve";
+import ImportanceScore from "./components/ImportanceScore";
+import ImportanceScoreJSON from "./components/ImportanceScoreJSON";
+import LearningCurve from "./components/LearningCurve";
+import LearningCurveJSON from "./components/LearningCurveJSON";
+import TestChart from "./components/TestChart";
+import PCA from "./components/PCA";
+import PCAJSON from "./components/PCAJSON";
+import TSNE from "./components/TSNE";
+import TSNEJSON from "./components/TSNEJSON";
+import RegFigure from "./components/RegFigure";
+import Score from "./components/Score";
+import NoScore from "./components/NoScore";
+import { Header, Grid, Loader, Dropdown, Menu } from "semantic-ui-react";
+import { formatDataset } from "utils/formatter";
+import ClassRate from "./components/ClassRate";
+import ChatGPT from "../ChatGPT";
class Results extends Component {
constructor(props) {
@@ -72,11 +72,11 @@ class Results extends Component {
}
/**
- * Basic helped method to create array containing [key,val] entries where
- * key - name of given score
- * value - actual score
- * passed to Score component which uses javascript library C3 to create graphic
- */
+ * Basic helped method to create array containing [key,val] entries where
+ * key - name of given score
+ * value - actual score
+ * passed to Score component which uses javascript library C3 to create graphic
+ */
// async getData(filename){
// const res = await fetch(filename);
@@ -90,22 +90,23 @@ class Results extends Component {
let expScores = experiment.data.scores;
// console.log("experiment.data")
- console.log("experiment.data",experiment.data)
+ console.log("experiment-777.data", experiment.data);
// console.log(experiment.data['class_1'][0])
// console.log(experiment.data['class_-1'][0])
- if(typeof(expScores) === 'object'){
- keyList.forEach(scoreKey => {
- if(expScores[scoreKey] && typeof expScores[scoreKey].toFixed === 'function'){
+ if (typeof expScores === "object") {
+ keyList.forEach((scoreKey) => {
+ if (
+ expScores[scoreKey] &&
+ typeof expScores[scoreKey].toFixed === "function"
+ ) {
let tempLabel;
- scoreKey.includes('train')
- ? tempLabel = 'Train (' + expScores[scoreKey].toFixed(2) + ')'
- : tempLabel = 'Test (' + expScores[scoreKey].toFixed(2) + ')';
- testList.push(
- [tempLabel, expScores[scoreKey]]
- );
+ scoreKey.includes("train")
+ ? (tempLabel = "Train (" + expScores[scoreKey].toFixed(2) + ")")
+ : (tempLabel = "Test (" + expScores[scoreKey].toFixed(2) + ")");
+ testList.push([tempLabel, expScores[scoreKey]]);
}
- });
+ });
}
return testList;
@@ -114,19 +115,15 @@ class Results extends Component {
render() {
const { experiment, fetchExperiment } = this.props;
- if(experiment.isFetching || !experiment.data) {
+ if (experiment.isFetching || !experiment.data) {
return (
);
}
- if(experiment.error === 'Failed to fetch') {
- return (
-
- );
- } else if(experiment.error) {
+ if (experiment.error === "Failed to fetch") {
+ return ;
+ } else if (experiment.error) {
return (
{
fetch(`/api/v1/experiments/${id}/model`)
- .then(response => {
- if(response.status >= 400) {
+ .then((response) => {
+ if (response.status >= 400) {
throw new Error(`${response.status}: ${response.statusText}`);
}
return response.json();
})
- .then(json => {
+ .then((json) => {
window.location = `/api/v1/files/${json._id}`;
});
};
const downloadScript = (id) => {
fetch(`/api/v1/experiments/${id}/script`)
- .then(response => {
- if(response.status >= 400) {
+ .then((response) => {
+ if (response.status >= 400) {
throw new Error(`${response.status}: ${response.statusText}`);
}
return response.json();
})
- .then(json => {
+ .then((json) => {
window.location = `/api/v1/files/${json._id}`;
});
};
// console.log(experiment.data.prediction_type)
// --- get lists of scores ---
- if(experiment.data.prediction_type == "classification") { // classification
+ if (experiment.data.prediction_type == "classification") {
+ // classification
- console.log("experiment.data", experiment.data)
+ console.log("experiment.data", experiment.data);
// console.log("X_pca", experiment.data.X_pca)
// console.log("y_pca", experiment.data.y_pca)
- let confusionMatrix, rocCurve, importanceScore, learningCurve, pca, pca_json, tsne, tsne_json, shap_explainer, shap_num_samples;
-
+ let confusionMatrix,
+ rocCurve,
+ importanceScore,
+ learningCurve,
+ pca,
+ pca_json,
+ tsne,
+ tsne_json,
+ shap_explainer,
+ shap_num_samples;
+
let shapSummaryCurveDict = {};
-
- experiment.data.experiment_files.forEach(file => {
+ experiment.data.experiment_files.forEach((file) => {
const filename = file.filename;
- console.log('filename',filename);
- if(filename.includes('confusion_matrix')) {
+ console.log("filename", filename);
+ if (filename.includes("confusion_matrix")) {
confusionMatrix = file;
- } else if(filename.includes('roc_curve')) {
+ } else if (filename.includes("roc_curve")) {
rocCurve = file;
- } else if(filename.includes('imp_score')) {
+ } else if (filename.includes("imp_score")) {
importanceScore = file;
- } else if(filename.includes('learning_curve')) {
+ } else if (filename.includes("learning_curve")) {
learningCurve = file;
-
-
- } else if(filename.includes('pca') && filename.includes('png')) {
+ } else if (filename.includes("pca") && filename.includes("png")) {
pca = file;
- console.log("pca", pca)
- } else if (filename.includes('pca-json')) {
- console.log("pca_json")
+ console.log("pca", pca);
+ } else if (filename.includes("pca-json")) {
+ console.log("pca_json");
pca_json = file;
- console.log("pca_json: ", pca_json)
- }
-
- else if(filename.includes('tsne') && filename.includes('png')) {
+ console.log("pca_json: ", pca_json);
+ } else if (filename.includes("tsne") && filename.includes("png")) {
tsne = file;
- console.log("tsne", tsne)
-
- }
- else if (filename.includes('tsne-json')) {
- console.log("tsne_json")
+ console.log("tsne", tsne);
+ } else if (filename.includes("tsne-json")) {
+ console.log("tsne_json");
tsne_json = file;
- console.log("tsne_json: ", tsne_json)
- }
-
- else if(filename.includes('shap_summary_curve')) {
- console.log("shap_summary_curve")
- let class_name = filename.split('_').slice(-2,-1);
+ console.log("tsne_json: ", tsne_json);
+ } else if (filename.includes("shap_summary_curve")) {
+ console.log("shap_summary_curve");
+ let class_name = filename.split("_").slice(-2, -1);
shapSummaryCurveDict[class_name] = file;
- shap_explainer=experiment.data.shap_explainer;
- shap_num_samples=experiment.data.shap_num_samples;
- }
- else if (filename.includes('shap_summary_json')) {
- console.log("shap_summary_json")
+ shap_explainer = experiment.data.shap_explainer;
+ shap_num_samples = experiment.data.shap_num_samples;
+ } else if (filename.includes("shap_summary_json")) {
+ console.log("shap_summary_json");
// shap_json = file;
// console.log("shap_json: ", shap_json)
}
-
});
// balanced accuracy
- let balancedAccKeys = ['train_balanced_accuracy_score', 'balanced_accuracy_score'];
+ let balancedAccKeys = [
+ "train_balanced_accuracy_score",
+ "balanced_accuracy_score",
+ ];
// precision scores
- let precisionKeys = ['train_precision_score', 'precision_score']
+ let precisionKeys = ["train_precision_score", "precision_score"];
// AUC
- let aucKeys = ['train_roc_auc_score', 'roc_auc_score'];
+ let aucKeys = ["train_roc_auc_score", "roc_auc_score"];
// f1 score
- let f1Keys = ['train_f1_score', 'f1_score'];
+ let f1Keys = ["train_f1_score", "f1_score"];
// recall
- let recallKeys = ['train_recall_score', 'recall_score'];
+ let recallKeys = ["train_recall_score", "recall_score"];
let balancedAccList = this.getGaugeArray(balancedAccKeys);
let precisionList = this.getGaugeArray(precisionKeys);
@@ -240,67 +239,62 @@ class Results extends Component {
let class_percentage = [];
// let pca_data = [];
-
-
-
- experiment.data.class_names.forEach(eachclass => {
-
- console.log('eachclass.toString()', eachclass.toString())
+ experiment.data.class_names.forEach((eachclass) => {
+ console.log("eachclass.toString()", eachclass.toString());
// if type of experiment.data['class_' + eachclass.toString()] === 'object'
- if ((typeof experiment.data['class_' + eachclass.toString()]) === 'object')
- {
- class_percentage.push(
- [eachclass.toString(), experiment.data['class_' + eachclass.toString()][0]]
- );
- console.log("experiment.data['class_1']", experiment.data['class_1'])
+ if (
+ typeof experiment.data["class_" + eachclass.toString()] === "object"
+ ) {
+ class_percentage.push([
+ eachclass.toString(),
+ experiment.data["class_" + eachclass.toString()][0],
+ ]);
+ console.log("experiment.data['class_1']", experiment.data["class_1"]);
+ } else {
+ class_percentage.push([
+ eachclass.toString(),
+ experiment.data["class_" + eachclass.toString()],
+ ]);
+ console.log("experiment.data['class_1']", experiment.data["class_1"]);
}
- else
- {
- class_percentage.push(
- [eachclass.toString(), experiment.data['class_' + eachclass.toString()]]
- );
- console.log("experiment.data['class_1']", experiment.data['class_1'])
- }
-
-
-
});
-
-
-
return (
-
-
-
+
- downloadModel(experiment.data._id)}
- />,
- downloadScript(experiment.data._id)}
- />
+ downloadModel(experiment.data._id)}
+ />
+ ,
+ downloadScript(experiment.data._id)}
+ />
@@ -308,8 +302,6 @@ class Results extends Component {
-
-
@@ -332,7 +324,8 @@ class Results extends Component {
type="classification"
/>
{/* */}
-
-
-
-
-
{/* */}
-
{/* */}
{/* This TestChart is for interactive and responsive confusion matrix */}
-
-
+
{/* https://en.wikipedia.org/wiki/Confusion_matrix */}
-
-
+
{/* GPT Space */}
-
+
-
@@ -454,74 +445,75 @@ class Results extends Component {
*/}
-
{/* GPT Space */}
-
-
-
-
+
-
-
-
);
- } else if(experiment.data.prediction_type == "regression") { // regression
+ } else if (experiment.data.prediction_type == "regression") {
+ // regression
let importanceScore, reg_cv_pred, reg_cv_resi, reg_cv_qq;
- experiment.data.experiment_files.forEach(file => {
+ experiment.data.experiment_files.forEach((file) => {
const filename = file.filename;
- if(filename.includes('imp_score')) {
+ if (filename.includes("imp_score")) {
importanceScore = file;
- } else if(filename.includes('reg_cv_pred')) {
+ } else if (filename.includes("reg_cv_pred")) {
reg_cv_pred = file;
- } else if(filename.includes('reg_cv_resi')) {
+ } else if (filename.includes("reg_cv_resi")) {
reg_cv_resi = file;
- } else if(filename.includes('reg_cv_qq')) {
+ } else if (filename.includes("reg_cv_qq")) {
reg_cv_qq = file;
}
-
});
// r2
- let R2Keys = ['train_r2_score', 'r2_score'];
+ let R2Keys = ["train_r2_score", "r2_score"];
// r
- let RKeys = ['train_pearsonr_score', 'pearsonr_score'];
+ let RKeys = ["train_pearsonr_score", "pearsonr_score"];
// r2
- let VAFKeys = ['train_explained_variance_score', 'explained_variance_score'];
+ let VAFKeys = [
+ "train_explained_variance_score",
+ "explained_variance_score",
+ ];
let R2List = this.getGaugeArray(R2Keys);
let RList = this.getGaugeArray(RKeys);
let VAFList = this.getGaugeArray(VAFKeys);
-
return (
-
+
- downloadModel(experiment.data._id)}
- />,
- downloadScript(experiment.data._id)}
- />
+ downloadModel(experiment.data._id)}
+ />
+ ,
+ downloadScript(experiment.data._id)}
+ />
@@ -531,8 +523,6 @@ class Results extends Component {
-
-
-
{/* */}
{/* */}
{/* */}
-
-
-
-
-
-
-
-
-
+
-
{/* GPT Space */}
-
+
@@ -626,11 +610,9 @@ class Results extends Component {
{/*
*/}
-
- {/* GPT Space */}
-
-
+ {/* GPT Space */}
+
);
}
@@ -638,7 +620,7 @@ class Results extends Component {
}
const mapStateToProps = (state) => ({
- experiment: state.experiments.selected
+ experiment: state.experiments.selected,
});
export { Results };
diff --git a/lab/webapp/src/components/Results/index.jsx b/lab/webapp/src/components/Results/index.jsx
index f0025df4e..bfbe9c03b 100644
--- a/lab/webapp/src/components/Results/index.jsx
+++ b/lab/webapp/src/components/Results/index.jsx
@@ -90,7 +90,7 @@ class Results extends Component {
let expScores = experiment.data.scores;
// console.log("experiment.data")
- console.log("experiment.data", experiment.data);
+ console.log("experiment-999.data", experiment.data);
// console.log(experiment.data['class_1'][0])
// console.log(experiment.data['class_-1'][0])
diff --git a/lab/webapp/src/components/ResultsV2/components/Score/index.jsx b/lab/webapp/src/components/ResultsV2/components/Score/index.jsx
index 5bb1edcc5..8c1678bb3 100644
--- a/lab/webapp/src/components/ResultsV2/components/Score/index.jsx
+++ b/lab/webapp/src/components/ResultsV2/components/Score/index.jsx
@@ -136,7 +136,9 @@ function Score({
if (typeof scoreValue !== "number" && !scoreValueList.length) {
return ;
} else {
+ console.log("777-scoreValueList", scoreValueList);
let fold = scoreValueList[0][1] / scoreValueList[1][1];
+ console.log("777-fold", fold);
var icons = foldcheck(fold);
let headericon = (
.
(Autogenerated header, do not modify)
*/
-import React, {Component} from 'react';
-import {connect} from 'react-redux';
-import * as actions from 'data/experiments/selected/actions';
-import SceneHeader from '../SceneHeader';
-import FetchError from '../FetchError';
-import AlgorithmDetails from './components/AlgorithmDetails';
-import RunDetails from './components/RunDetails'
-import MSEMAEDetails from './components/MSEMAEDetails';;
-import ConfusionMatrix from './components/ConfusionMatrix';
-import ConfusionMatrixJSON from './components/ConfusionMatrixJSON';
-import ROCCurve from './components/ROCCurve';
-import ShapSummaryCurve from './components/ShapSummaryCurve';
-import ImportanceScore from './components/ImportanceScore';
-import ImportanceScoreJSON from './components/ImportanceScoreJSON';
-import LearningCurve from './components/LearningCurve';
-import LearningCurveJSON from './components/LearningCurveJSON';
-import TestChart from './components/TestChart';
-import PCA from './components/PCA';
+import React, { Component } from "react";
+import { connect } from "react-redux";
+import * as actions from "data/experiments/selected/actions";
+import SceneHeader from "../SceneHeader";
+import FetchError from "../FetchError";
+import AlgorithmDetails from "./components/AlgorithmDetails";
+import RunDetails from "./components/RunDetails";
+import MSEMAEDetails from "./components/MSEMAEDetails";
+import ConfusionMatrix from "./components/ConfusionMatrix";
+import ConfusionMatrixJSON from "./components/ConfusionMatrixJSON";
+import ROCCurve from "./components/ROCCurve";
+import ShapSummaryCurve from "./components/ShapSummaryCurve";
+import ImportanceScore from "./components/ImportanceScore";
+import ImportanceScoreJSON from "./components/ImportanceScoreJSON";
+import LearningCurve from "./components/LearningCurve";
+import LearningCurveJSON from "./components/LearningCurveJSON";
+import TestChart from "./components/TestChart";
+import PCA from "./components/PCA";
// import PCAJSON from './components/PCAJSON';
-import GenPLOT from './components/GenPLOT';
+import GenPLOT from "./components/GenPLOT";
// import PCAJSONV from './components/PCAJSONV';
// import TSNE from './components/TSNE';
// import TSNEJSON from './components/TSNEJSON';
-import RegFigure from './components/RegFigure';
-import Score from './components/Score';
+import RegFigure from "./components/RegFigure";
+import Score from "./components/Score";
// import NoScore from './components/NoScore';
-import {Header, Grid, Loader, Dropdown, Menu} from 'semantic-ui-react';
-import {formatDataset} from 'utils/formatter';
-import ClassRate from './components/ClassRate';
-import ChatGPT from '../ChatGPT';
+import { Header, Grid, Loader, Dropdown, Menu } from "semantic-ui-react";
+import { formatDataset } from "utils/formatter";
+import ClassRate from "./components/ClassRate";
+import ChatGPT from "../ChatGPT";
function moveSlidermakeBlack(e) {
+ let block = document.getElementsByClassName("chartsbaseleft")[0];
+ let slider = document.getElementsByClassName("slider")[0];
+ let chatbox = document.getElementsByClassName("chatbaseright")[0];
+
+ if (block && slider) {
+ // console.log("block and slider exist");
+
+ slider.onmousedown = function dragMouseDown(e) {
+ // get width of window
+ let windowWidth = window.innerWidth;
+ // console.log("windowWidth", windowWidth);
+ let dragX = e.clientX;
+ // console.log("e.clientX", e.clientX);
+ document.onmousemove = function onMouseMove(e) {
+ // 0.2 --0.3 --0.4 --0.8 --
+
+ console.log("block.offsetWidth", block.offsetWidth);
+
+ // shift the result block to the right, and make the chatbox invisible
+ if (block.offsetWidth > 0.8 * windowWidth) {
+ console.log("range-bigger than 0.8");
+ block.style.width = windowWidth + "px";
+ dragX = e.clientX;
+ chatbox.style.visibility = "hidden";
+ }
-
-
- let block = document.getElementsByClassName("chartsbaseleft")[0];
- let slider = document.getElementsByClassName("slider")[0];
- let chatbox = document.getElementsByClassName("chatbaseright")[0];
-
-
- if (block && slider) {
-
- // console.log("block and slider exist");
-
- slider.onmousedown = function dragMouseDown(e) {
- // get width of window
- let windowWidth = window.innerWidth;
- // console.log("windowWidth", windowWidth);
- let dragX = e.clientX;
- // console.log("e.clientX", e.clientX);
- document.onmousemove = function onMouseMove(e) {
-
-
- // 0.2 --0.3 --0.4 --0.8 --
-
- console.log("block.offsetWidth", block.offsetWidth)
-
- // shift the result block to the right, and make the chatbox invisible
- if (block.offsetWidth > 0.8 * windowWidth) {
- console.log("range-bigger than 0.8")
- block.style.width = windowWidth + "px";
- dragX = e.clientX;
- chatbox.style.visibility = "hidden";
- }
-
- // shift the chatbox to the left, and make the result block invisible
- else if (block.offsetWidth < 0.2 * windowWidth) {
- console.log("range-smaller than 0.2")
- block.style.width = 0 + "px";
- block.style.visibility = "hidden";
- slider.style.visibility = "hidden";
- dragX = e.clientX;
-
-
- }
-
- // else
- // {
- // console.log("range-bigger than or equal to 0.5 and smaller than or equal to 0.8")
- // block.style.visibility = "block";
- // slider.style.visibility = "block";
-
- // // origin
- // block.style.width = block.offsetWidth + e.clientX - dragX + "px";
- // dragX = e.clientX;
-
-
-
- // }
-
-
-
- else if (block.offsetWidth >= 0.4 * windowWidth && block.offsetWidth <= 0.8 * windowWidth)
- {
- console.log("range-bigger than or equal to 0.4 and smaller than or equal to 0.8")
- block.style.visibility = "block";
- slider.style.visibility = "block";
-
- // origin
- block.style.width = block.offsetWidth + e.clientX - dragX + "px";
- dragX = e.clientX;
-
- // if e.target.parentElement.childNodes[0].childNodes[1] is not undefined
-
- if (e.target.parentElement.childNodes[0].childNodes[1].className === "ui stackable two column grid") {
-
- e.target.parentElement.childNodes[0].childNodes[1].className = "ui stackable three column grid"
- }
-
- else if (e.target.parentElement.childNodes[0].childNodes[1].className === "ui stackable one column grid") {
-
- e.target.parentElement.childNodes[0].childNodes[1].className = "ui stackable three column grid"
- }
- }
-
-
-
- else if (block.offsetWidth >= 0.3 * windowWidth && block.offsetWidth < 0.4 * windowWidth)
- {
- console.log("range-bigger than or equal to 0.3 and smaller than 0.4")
- block.style.visibility = "block";
- slider.style.visibility = "block";
-
- // origin
- block.style.width = block.offsetWidth + e.clientX - dragX + "px";
- dragX = e.clientX;
-
-
- if (e.target.parentElement.childNodes[0].childNodes[1].className === "ui stackable three column grid") {
-
- e.target.parentElement.childNodes[0].childNodes[1].className = "ui stackable two column grid"
- }
-
- else if (e.target.parentElement.childNodes[0].childNodes[1].className === "ui stackable one column grid") {
-
- e.target.parentElement.childNodes[0].childNodes[1].className = "ui stackable two column grid"
- }
- }
-
- else if (block.offsetWidth >= 0.2 * windowWidth && block.offsetWidth < 0.3 * windowWidth)
- {
- console.log("range-bigger than or equal to 0.2 and smaller than 0.3")
- block.style.visibility = "block";
- slider.style.visibility = "block";
-
- // origin
- block.style.width = block.offsetWidth + e.clientX - dragX + "px";
- dragX = e.clientX;
-
- if (e.target.parentElement.childNodes[0].childNodes[1].className === "ui stackable three column grid") {
-
- e.target.parentElement.childNodes[0].childNodes[1].className = "ui stackable one column grid"
- }
-
- else if (e.target.parentElement.childNodes[0].childNodes[1].className === "ui stackable two column grid") {
-
- e.target.parentElement.childNodes[0].childNodes[1].className = "ui stackable one column grid"
- }
- }
-
-
- }
- // remove mouse-move listener on mouse-up
- document.onmouseup = () => document.onmousemove = document.onmouseup = null;
+ // shift the chatbox to the left, and make the result block invisible
+ else if (block.offsetWidth < 0.2 * windowWidth) {
+ console.log("range-smaller than 0.2");
+ block.style.width = 0 + "px";
+ block.style.visibility = "hidden";
+ slider.style.visibility = "hidden";
+ dragX = e.clientX;
}
- }
+ // else
+ // {
+ // console.log("range-bigger than or equal to 0.5 and smaller than or equal to 0.8")
+ // block.style.visibility = "block";
+ // slider.style.visibility = "block";
+
+ // // origin
+ // block.style.width = block.offsetWidth + e.clientX - dragX + "px";
+ // dragX = e.clientX;
+
+ // }
+ else if (
+ block.offsetWidth >= 0.4 * windowWidth &&
+ block.offsetWidth <= 0.8 * windowWidth
+ ) {
+ console.log(
+ "range-bigger than or equal to 0.4 and smaller than or equal to 0.8"
+ );
+ block.style.visibility = "block";
+ slider.style.visibility = "block";
+
+ // origin
+ block.style.width = block.offsetWidth + e.clientX - dragX + "px";
+ dragX = e.clientX;
+
+ // if e.target.parentElement.childNodes[0].childNodes[1] is not undefined
+
+ if (
+ e.target.parentElement.childNodes[0].childNodes[1].className ===
+ "ui stackable two column grid"
+ ) {
+ e.target.parentElement.childNodes[0].childNodes[1].className =
+ "ui stackable three column grid";
+ } else if (
+ e.target.parentElement.childNodes[0].childNodes[1].className ===
+ "ui stackable one column grid"
+ ) {
+ e.target.parentElement.childNodes[0].childNodes[1].className =
+ "ui stackable three column grid";
+ }
+ } else if (
+ block.offsetWidth >= 0.3 * windowWidth &&
+ block.offsetWidth < 0.4 * windowWidth
+ ) {
+ console.log("range-bigger than or equal to 0.3 and smaller than 0.4");
+ block.style.visibility = "block";
+ slider.style.visibility = "block";
+
+ // origin
+ block.style.width = block.offsetWidth + e.clientX - dragX + "px";
+ dragX = e.clientX;
+
+ if (
+ e.target.parentElement.childNodes[0].childNodes[1].className ===
+ "ui stackable three column grid"
+ ) {
+ e.target.parentElement.childNodes[0].childNodes[1].className =
+ "ui stackable two column grid";
+ } else if (
+ e.target.parentElement.childNodes[0].childNodes[1].className ===
+ "ui stackable one column grid"
+ ) {
+ e.target.parentElement.childNodes[0].childNodes[1].className =
+ "ui stackable two column grid";
+ }
+ } else if (
+ block.offsetWidth >= 0.2 * windowWidth &&
+ block.offsetWidth < 0.3 * windowWidth
+ ) {
+ console.log("range-bigger than or equal to 0.2 and smaller than 0.3");
+ block.style.visibility = "block";
+ slider.style.visibility = "block";
+
+ // origin
+ block.style.width = block.offsetWidth + e.clientX - dragX + "px";
+ dragX = e.clientX;
+
+ if (
+ e.target.parentElement.childNodes[0].childNodes[1].className ===
+ "ui stackable three column grid"
+ ) {
+ e.target.parentElement.childNodes[0].childNodes[1].className =
+ "ui stackable one column grid";
+ } else if (
+ e.target.parentElement.childNodes[0].childNodes[1].className ===
+ "ui stackable two column grid"
+ ) {
+ e.target.parentElement.childNodes[0].childNodes[1].className =
+ "ui stackable one column grid";
+ }
+ }
+ };
+ // remove mouse-move listener on mouse-up
+ document.onmouseup = () =>
+ (document.onmousemove = document.onmouseup = null);
+ };
+ }
}
function makeOriginColor(e) {
- let slider = document.getElementsByClassName("slider")[0];
+ let slider = document.getElementsByClassName("slider")[0];
- // make slider color black
- slider.style.backgroundColor = "#1B1C1D;"
+ // make slider color black
+ slider.style.backgroundColor = "#1B1C1D;";
}
class Results extends Component {
- constructor(props) {
- super(props);
- this.getGaugeArray = this
- .getGaugeArray
- .bind(this);
+ constructor(props) {
+ super(props);
+ this.getGaugeArray = this.getGaugeArray.bind(this);
+ }
+
+ componentDidMount() {
+ this.props.fetchExperiment(this.props.params.id);
+ }
+
+ componentWillUnmount() {
+ this.props.clearExperiment();
+ }
+
+ /**
+ * Basic helped method to create array containing [key,val] entries where
+ * key - name of given score
+ * value - actual score
+ * passed to Score component which uses javascript library C3 to create graphic
+ */
+
+ getGaugeArray(keyList) {
+ const { experiment } = this.props;
+ let testList = [];
+ let expScores = experiment.data.scores;
+
+ if (typeof expScores === "object") {
+ keyList.forEach((scoreKey) => {
+ console.log("scoreKey", scoreKey);
+ // in case of 0 or false, it should satisfy the condition
+
+ if (
+ expScores.hasOwnProperty(scoreKey) &&
+ expScores[scoreKey] !== undefined &&
+ expScores[scoreKey] !== null &&
+ typeof expScores[scoreKey].toFixed === "function"
+ ) {
+ let tempLabel = scoreKey.includes("train")
+ ? `Train (${expScores[scoreKey].toFixed(2)})`
+ : `Test (${expScores[scoreKey].toFixed(2)})`;
+ console.log("555-scoreKey", scoreKey);
+ console.log("555-expScores[scoreKey]", expScores[scoreKey]);
+ testList.push([tempLabel, expScores[scoreKey]]);
+ }
+ });
}
- componentDidMount() {
- this
- .props
- .fetchExperiment(this.props.params.id);
- }
+ console.log("testList", testList);
- componentWillUnmount() {
- this
- .props
- .clearExperiment();
- }
+ return testList;
+ }
- /**
- * Basic helped method to create array containing [key,val] entries where
- * key - name of given score
- * value - actual score
- * passed to Score component which uses javascript library C3 to create graphic
- */
-
- getGaugeArray(keyList) {
- const {experiment} = this.props;
- let testList = [];
- let expScores = experiment.data.scores;
-
- // console.log("experiment.data")
- console.log("experiment.data", experiment.data)
- // console.log(experiment.data['class_1'][0])
- // console.log(experiment.data['class_-1'][0])
-
- if (typeof(expScores) === 'object') {
- keyList.forEach(scoreKey => {
- if (expScores[scoreKey] && typeof expScores[scoreKey].toFixed === 'function') {
- let tempLabel;
- scoreKey.includes('train')
- ? tempLabel = 'Train (' + expScores[scoreKey].toFixed(2) + ')'
- : tempLabel = 'Test (' + expScores[scoreKey].toFixed(2) + ')';
- testList.push([
- tempLabel, expScores[scoreKey]
- ]);
- }
- });
- }
+ render() {
+ const { experiment, fetchExperiment } = this.props;
- return testList;
+ if (experiment.isFetching || !experiment.data) {
+ return (
+
+ );
}
- render() {
- const {experiment, fetchExperiment} = this.props;
+ if (experiment.error === "Failed to fetch") {
+ return ;
+ } else if (experiment.error) {
+ return (
+ fetchExperiment()}
+ />
+ );
+ }
- if (experiment.isFetching || !experiment.data) {
- return (
-
- );
+ const downloadModel = (id) => {
+ // console.log("downloadModel_id",id)
+ fetch(`/api/v1/experiments/${id}/model`)
+ .then((response) => {
+ if (response.status >= 400) {
+ throw new Error(`${response.status}: ${response.statusText}`);
+ }
+ return response.json();
+ })
+ .then((json) => {
+ console.log("json", json);
+ window.location = `/api/v1/files/${json._id}`;
+ });
+ };
+
+ const downloadScript = (id) => {
+ fetch(`/api/v1/experiments/${id}/script`)
+ .then((response) => {
+ if (response.status >= 400) {
+ throw new Error(`${response.status}: ${response.statusText}`);
+ }
+ return response.json();
+ })
+ .then((json) => {
+ window.location = `/api/v1/files/${json._id}`;
+ });
+ };
+
+ // console.log(experiment.data.prediction_type) --- get lists of scores ---
+ if (experiment.data.prediction_type == "classification") {
+ // classification
+
+ console.log("experiment.data", experiment.data);
+ // console.log("X_pca", experiment.data.X_pca) console.log("y_pca",
+ // experiment.data.y_pca)
+
+ let confusionMatrix,
+ rocCurve,
+ importanceScore,
+ learningCurve,
+ pca,
+ pca_json,
+ tsne,
+ tsne_json,
+ shap_explainer,
+ shap_num_samples;
+
+ let shapSummaryCurveDict = {};
+
+ experiment.data.experiment_files.forEach(async (file) => {
+ const filename = file.filename;
+ console.log("filename-test", filename);
+ if (filename.includes("confusion_matrix")) {
+ confusionMatrix = file;
+ } else if (filename.includes("roc_curve")) {
+ rocCurve = file;
+ // save to local storage localStorage.setItem('rocCurve', rocCurve);
+ } else if (filename.includes("imp_score")) {
+ importanceScore = file;
+ } else if (filename.includes("learning_curve")) {
+ learningCurve = file;
+ } else if (filename.includes("pca") && filename.includes("png")) {
+ pca = file;
+ console.log("pca", pca);
+ } else if (filename.includes("pca-json")) {
+ console.log("pca_json");
+ pca_json = file;
+ } else if (filename.includes("tsne") && filename.includes("png")) {
+ tsne = file;
+ console.log("tsne", tsne);
+ } else if (filename.includes("tsne-json")) {
+ console.log("tsne_json");
+ tsne_json = file;
+ console.log("tsne_json: ", tsne_json);
+ } else if (filename.includes("shap_summary_curve")) {
+ console.log("shap_summary_curve");
+ let class_name = filename.split("_").slice(-2, -1);
+ shapSummaryCurveDict[class_name] = file;
+ shap_explainer = experiment.data.shap_explainer;
+ shap_num_samples = experiment.data.shap_num_samples;
+
+ // save to local storage localStorage.setItem( 'shapSummaryCurveDict',
+ // JSON.stringify(shapSummaryCurveDict) );
+ // localStorage.setItem('shap_explainer', shap_explainer);
+ // localStorage.setItem('shap_num_samples', shap_num_samples);
+ } else if (filename.includes("shap_summary_json")) {
+ console.log("shap_summary_json");
+ // shap_json = file; console.log("shap_json: ", shap_json)
}
-
- if (experiment.error === 'Failed to fetch') {
- return ( );
- } else if (experiment.error) {
- return (
- fetchExperiment()}/>
- );
+ });
+ // balanced accuracy
+ let balancedAccKeys = [
+ "train_balanced_accuracy_score",
+ "balanced_accuracy_score",
+ ];
+ // precision scores
+ let precisionKeys = ["train_precision_score", "precision_score"];
+ // AUC
+ let aucKeys = ["train_roc_auc_score", "roc_auc_score"];
+ // f1 score
+ let f1Keys = ["train_f1_score", "f1_score"];
+ // recall
+ let recallKeys = ["train_recall_score", "recall_score"];
+
+ let balancedAccList = this.getGaugeArray(balancedAccKeys);
+ let precisionList = this.getGaugeArray(precisionKeys);
+ let aucList = this.getGaugeArray(aucKeys);
+ let recallList = this.getGaugeArray(recallKeys);
+ let f1List = this.getGaugeArray(f1Keys);
+ let class_percentage = [];
+ // let pca_data = [];
+
+ experiment.data.class_names.forEach((eachclass) => {
+ console.log("eachclass.toString()", eachclass.toString());
+ // if type of experiment.data['class_' + eachclass.toString()] === 'object'
+ if (
+ typeof experiment.data["class_" + eachclass.toString()] === "object"
+ ) {
+ class_percentage.push([
+ eachclass.toString(),
+ experiment.data["class_" + eachclass.toString()][0],
+ ]);
+ console.log("experiment.data['class_1']", experiment.data["class_1"]);
+ } else {
+ class_percentage.push([
+ eachclass.toString(),
+ experiment.data["class_" + eachclass.toString()],
+ ]);
+ console.log("experiment.data['class_1']", experiment.data["class_1"]);
}
-
- const downloadModel = (id) => {
- // console.log("downloadModel_id",id)
- fetch(`/api/v1/experiments/${id}/model`)
- .then(response => {
- if (response.status >= 400) {
- throw new Error(`${response.status}: ${response.statusText}`);
- }
- return response.json();
- })
- .then(json => {
- console.log("json",json)
- window.location = `/api/v1/files/${json._id}`;
- });
- };
-
- const downloadScript = (id) => {
- fetch(`/api/v1/experiments/${id}/script`)
- .then(response => {
- if (response.status >= 400) {
- throw new Error(`${response.status}: ${response.statusText}`);
- }
- return response.json();
- })
- .then(json => {
- window.location = `/api/v1/files/${json._id}`;
- });
- };
-
- // console.log(experiment.data.prediction_type) --- get lists of scores ---
- if (experiment.data.prediction_type == "classification") { // classification
-
- console.log("experiment.data", experiment.data)
- // console.log("X_pca", experiment.data.X_pca) console.log("y_pca",
- // experiment.data.y_pca)
-
- let confusionMatrix,
- rocCurve,
- importanceScore,
- learningCurve,
- pca,
- pca_json,
- tsne,
- tsne_json,
- shap_explainer,
- shap_num_samples;
-
- let shapSummaryCurveDict = {};
-
- experiment
- .data
- .experiment_files
- .forEach(async file => {
- const filename = file.filename;
- console.log('filename-test', filename);
- if (filename.includes('confusion_matrix')) {
- confusionMatrix = file;
- } else if (filename.includes('roc_curve')) {
- rocCurve = file;
- // save to local storage localStorage.setItem('rocCurve', rocCurve);
- } else if (filename.includes('imp_score')) {
- importanceScore = file;
- } else if (filename.includes('learning_curve')) {
- learningCurve = file;
- } else if (filename.includes('pca') && filename.includes('png')) {
- pca = file;
- console.log("pca", pca)
- } else if (filename.includes('pca-json')) {
- console.log("pca_json")
- pca_json = file;
- } else if (filename.includes('tsne') && filename.includes('png')) {
- tsne = file;
- console.log("tsne", tsne)
- } else if (filename.includes('tsne-json')) {
- console.log("tsne_json")
- tsne_json = file;
- console.log("tsne_json: ", tsne_json)
- }
-
-
- else if (filename.includes('shap_summary_curve')) {
- console.log("shap_summary_curve")
- let class_name = filename
- .split('_')
- .slice(-2, -1);
- shapSummaryCurveDict[class_name] = file;
- shap_explainer = experiment.data.shap_explainer;
- shap_num_samples = experiment.data.shap_num_samples;
-
- // save to local storage localStorage.setItem( 'shapSummaryCurveDict',
- // JSON.stringify(shapSummaryCurveDict) );
- // localStorage.setItem('shap_explainer', shap_explainer);
- // localStorage.setItem('shap_num_samples', shap_num_samples);
-
- } else if (filename.includes('shap_summary_json')) {
- console.log("shap_summary_json")
- // shap_json = file; console.log("shap_json: ", shap_json)
- }
-
- });
- // balanced accuracy
- let balancedAccKeys = ['train_balanced_accuracy_score', 'balanced_accuracy_score'];
- // precision scores
- let precisionKeys = ['train_precision_score', 'precision_score']
- // AUC
- let aucKeys = ['train_roc_auc_score', 'roc_auc_score'];
- // f1 score
- let f1Keys = ['train_f1_score', 'f1_score'];
- // recall
- let recallKeys = ['train_recall_score', 'recall_score'];
-
- let balancedAccList = this.getGaugeArray(balancedAccKeys);
- let precisionList = this.getGaugeArray(precisionKeys);
- let aucList = this.getGaugeArray(aucKeys);
- let recallList = this.getGaugeArray(recallKeys);
- let f1List = this.getGaugeArray(f1Keys);
- let class_percentage = [];
- // let pca_data = [];
-
- experiment
- .data
- .class_names
- .forEach(eachclass => {
-
- console.log('eachclass.toString()', eachclass.toString())
- // if type of experiment.data['class_' + eachclass.toString()] === 'object'
- if ((typeof experiment.data['class_' + eachclass.toString()]) === 'object') {
- class_percentage.push([
- eachclass.toString(),
- experiment.data['class_' + eachclass.toString()][0]
- ]);
- console.log("experiment.data['class_1']", experiment.data['class_1'])
- } else {
- class_percentage.push([
- eachclass.toString(),
- experiment.data['class_' + eachclass.toString()]
- ]);
- console.log("experiment.data['class_1']", experiment.data['class_1'])
- }
-
- });
-
- // console.log('balancedAccList', balancedAccList) save to local storage
- // localStorage.setItem('balancedAccList', JSON.stringify(balancedAccList));
- // console.log('precisionList', precisionList) save to local storage
- // localStorage.setItem('precisionList', JSON.stringify(precisionList)); save
- // to local storage console.log('aucList', aucList)
- // localStorage.setItem('aucList', JSON.stringify(aucList)); save to local
- // storage console.log('recallList', recallList)
- // localStorage.setItem('recallList', JSON.stringify(recallList)); save to
- // local storage console.log('f1List', f1List) localStorage.setItem('f1List',
- // JSON.stringify(f1List)); save to local storage
- // console.log('class_percentage', class_percentage)
- // localStorage.setItem('class_percentage', JSON.stringify(class_percentage));
-
- return (
-
-
-
-
-
-
-
-
-
-
-
- downloadModel(experiment.data._id)}/>,
- downloadScript(experiment.data._id)}/>
-
-
-
-
-
-
-
-
-
-
-
- {/* */}
- {/* */}
-
- {/* */}
-
-
-
-
-
- {/*
+
+
+
+
+
+
+
+
+
+
+ downloadModel(experiment.data._id)}
+ />
+ ,
+ downloadScript(experiment.data._id)}
+ />
+
+
+
+
+
+
+
+
+
+
+
+ {" "}
+ {/* */}
+ {" "}
+ {/* */}
+
+ {/* */}
+
+
+
+ {/* */
- }
-
- {/* */}
- {/* This TestChart is for interactive and responsive confusion matrix */}
-
-
-
- {/* */}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- {/* onChange={moveSlidermakeBlack} */}
- ""
- ""
- ""
- ""
- ""
- ""
-
-
-
-
-
-
- //
- //
- //
- //
- // downloadModel(experiment.data._id)}/>, downloadScript(experiment.data._id)}/>
- //
- //
- // {/* */} {/* */} {/* */}
- // {/* */}
- // {/* */} {/* */ }
- // {/* */ } {/*
- // */} {/*
- // This TestChart is for interactive and responsive confusion matrix */}
- //
- //
- //
- // {/*
- // https://en.wikipedia.org/wiki/Confusion_matrix
- // */}
- // {/* GPT Space */} {/*
- // */ } {/* GPT Space */}
- );
- } else if (experiment.data.prediction_type == "regression") { // regression
- let importanceScore,
- reg_cv_pred,
- reg_cv_resi,
- reg_cv_qq,
- reg_cvp_png,
- reg_cvp_json,
- reg_cvr_png,
- reg_cvr_json,
- reg_qqnr_png,
- reg_qqnr_json;
-
- experiment
- .data
- .experiment_files
- .forEach(file => {
- const filename = file.filename;
- console.log("filename-regression", filename)
- if (filename.includes('imp_score')) {
- importanceScore = file;
- } else if (filename.includes('reg_cv_pred')) {
- reg_cv_pred = file;
- } else if (filename.includes('reg_cv_resi')) {
- reg_cv_resi = file;
- } else if (filename.includes('reg_cv_qq')) {
- reg_cv_qq = file;
- } else if (filename.includes('reg_cv_pred') && filename.includes('png') ) {
- reg_cvp_png = file;
- console.log("reg_cvp_png", reg_cvp_png)
- } else if (filename.includes('reg_cv_resi') && filename.includes('png')) {
- reg_cvr_png = file;
- console.log("reg_cvr_png", reg_cvr_png)
- } else if (filename.includes('reg_cv_qq') && filename.includes('png')) {
- reg_qqnr_png = file;
- }else if (filename.includes('reg_cvp') && filename.includes('json') ) {
- reg_cvp_json = file;
- console.log("reg_cvp_json", reg_cvp_json)
- } else if (filename.includes('reg_cvr') && filename.includes('json')) {
- reg_cvr_json = file;
- console.log("reg_cvr_json", reg_cvr_json)
- } else if (filename.includes('reg_qqnr') && filename.includes('json')) {
- reg_qqnr_json = file;
- console.log("reg_qqnr_json", reg_qqnr_json)
- }
-
- });
- // r2
- let R2Keys = ['train_r2_score', 'r2_score'];
- // r
- let RKeys = ['train_pearsonr_score', 'pearsonr_score'];
- // r2
- let VAFKeys = ['train_explained_variance_score', 'explained_variance_score'];
-
- let R2List = this.getGaugeArray(R2Keys);
- let RList = this.getGaugeArray(RKeys);
- let VAFList = this.getGaugeArray(VAFKeys);
-
- return (
-
-
-
-
-
-
-
-
-
-
-
-
- downloadModel(experiment.data._id)}/>,
- downloadScript(experiment.data._id)}/>
-
-
-
-
-
-
-
-
-
-
-
- {/* */}
-
-
-
-
- {/* */}
- {/* */}
- {/* */}
-
- {/* {
+ /> */}
+ {" "}
+ {/* */}
+ {/* This TestChart is for interactive and responsive confusion matrix */}
+
+
+
+ {/* */}
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {/* onChange={moveSlidermakeBlack} */}
+ ""
+ ""
+ ""
+ ""
+ ""
+ ""
+
+
+
+
+
+
+ //
+ //
+ //
+ //
+ // downloadModel(experiment.data._id)}/>, downloadScript(experiment.data._id)}/>
+ //
+ //
+ // {/* */} {/* */} {/* */}
+ // {/* */}
+ // {/* */} {/* */ }
+ // {/* */ } {/*
+ // */} {/*
+ // This TestChart is for interactive and responsive confusion matrix */}
+ //
+ //
+ //
+ // {/*
+ // https://en.wikipedia.org/wiki/Confusion_matrix
+ // */}
+ // {/* GPT Space */} {/*
+ // */ } {/* GPT Space */}
+ );
+ } else if (experiment.data.prediction_type == "regression") {
+ // regression
+ let importanceScore,
+ reg_cv_pred,
+ reg_cv_resi,
+ reg_cv_qq,
+ reg_cvp_png,
+ reg_cvp_json,
+ reg_cvr_png,
+ reg_cvr_json,
+ reg_qqnr_png,
+ reg_qqnr_json;
+
+ experiment.data.experiment_files.forEach((file) => {
+ const filename = file.filename;
+ console.log("filename-regression", filename);
+ if (filename.includes("imp_score")) {
+ importanceScore = file;
+ } else if (filename.includes("reg_cv_pred")) {
+ reg_cv_pred = file;
+ } else if (filename.includes("reg_cv_resi")) {
+ reg_cv_resi = file;
+ } else if (filename.includes("reg_cv_qq")) {
+ reg_cv_qq = file;
+ } else if (
+ filename.includes("reg_cv_pred") &&
+ filename.includes("png")
+ ) {
+ reg_cvp_png = file;
+ console.log("reg_cvp_png", reg_cvp_png);
+ } else if (
+ filename.includes("reg_cv_resi") &&
+ filename.includes("png")
+ ) {
+ reg_cvr_png = file;
+ console.log("reg_cvr_png", reg_cvr_png);
+ } else if (filename.includes("reg_cv_qq") && filename.includes("png")) {
+ reg_qqnr_png = file;
+ } else if (filename.includes("reg_cvp") && filename.includes("json")) {
+ reg_cvp_json = file;
+ console.log("reg_cvp_json", reg_cvp_json);
+ } else if (filename.includes("reg_cvr") && filename.includes("json")) {
+ reg_cvr_json = file;
+ console.log("reg_cvr_json", reg_cvr_json);
+ } else if (filename.includes("reg_qqnr") && filename.includes("json")) {
+ reg_qqnr_json = file;
+ console.log("reg_qqnr_json", reg_qqnr_json);
+ }
+ });
+ // r2
+ let R2Keys = ["train_r2_score", "r2_score"];
+ // r
+ let RKeys = ["train_pearsonr_score", "pearsonr_score"];
+ // r2
+ let VAFKeys = [
+ "train_explained_variance_score",
+ "explained_variance_score",
+ ];
+
+ let R2List = this.getGaugeArray(R2Keys);
+ let RList = this.getGaugeArray(RKeys);
+ let VAFList = this.getGaugeArray(VAFKeys);
+
+ return (
+
+
+
+
+
+
+
+
+
+
+
+ downloadModel(experiment.data._id)}
+ />
+ ,
+ downloadScript(experiment.data._id)}
+ />
+
+
+
+
+
+
+
+
+
+
+ {" "}
+ {/* */}
+
+
+
+ {/* */}
+ {/* */}
+ {/* */}
+
+ {/* {
experiment.data.CVP_2d === undefined
?
:
} */}
-
-
- {/*
+
+ {/* */
- }
-
- {/* {
+ data={experiment.data}/> */}
+
+ {/* {
experiment.data.CVR_2d === undefined
?
:
} */}
-
-
- {/* {
+
+
+ {/* {
experiment.data.QQNR_2d === undefined
?
:
} */}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- {/* onChange={moveSlidermakeBlack} */}
- ""
- ""
- ""
- ""
- ""
- ""
-
-
-
-
-
-
- );
- }
+
+
+
+
+
+
+
+
+
+
+
+
+ {/* onChange={moveSlidermakeBlack} */}
+ ""
+ ""
+ ""
+ ""
+ ""
+ ""
+
+
+
+
+
+ );
}
+ }
}
-const mapStateToProps = (state) => ({experiment: state.experiments.selected});
-
+const mapStateToProps = (state) => ({ experiment: state.experiments.selected });
-export {
- Results
-};
+export { Results };
export default connect(mapStateToProps, actions)(Results);
diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py
index 8654ff96e..7a9ba8ccb 100644
--- a/machine/learn/skl_utils.py
+++ b/machine/learn/skl_utils.py
@@ -38,7 +38,7 @@
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder
-from sklearn.model_selection import GridSearchCV, cross_validate, StratifiedKFold, KFold
+from sklearn.model_selection import GridSearchCV, cross_validate, StratifiedKFold, RepeatedStratifiedKFold, KFold
from sklearn.metrics import SCORERS, roc_curve, auc, make_scorer, confusion_matrix
import itertools
import json
@@ -175,6 +175,36 @@ def get_column_names_from_ColumnTransformer(column_transformer, feature_names):
new_feature_names += feature_columns
return new_feature_names
+# decision rule for cross validation 2, 3, 4, 5, 6, 7, 8, 9, 10
+def decision_rule_cv_based_on_classes(each_class):
+ """
+ Adjusts the number of cross-validation folds based on the class distribution.
+
+ Parameters
+ ----------
+ each_class : dict
+ A dictionary where keys are the classes and the values are the number of samples per class.
+
+ Returns
+ -------
+ cv : int
+ Adjusted number of cross-validation folds.
+ """
+ # Find the class with the minimum number of samples based on the class sample counts
+ min_samples = min(each_class.values())
+
+ # Calculate the number of classes
+ n_classes = len(each_class)
+
+ # Determine the appropriate number of cv folds based on the class with the minimum samples
+ if n_classes == 2:
+ # For binary classification, ensure at least one sample of each class is present in the folds, to the extent possible
+ n_split = min(max(2, min_samples), 10)
+ else:
+ # For multi-class, use more folds if possible to balance between classes
+ n_split = min(max(3, min_samples), 10)
+
+ return n_split
def generate_results(model, input_data,
tmpdir, _id, target_name='class',
@@ -246,6 +276,14 @@ def generate_results(model, input_data,
feature_names = np.array(
[x for x in input_data.columns.values if x != target_name])
num_classes = input_data[target_name].unique().shape[0]
+
+ # calculate number of each class
+ each_class = input_data[target_name].value_counts()
+ print("each_class", each_class)
+ print("num_classes", num_classes)
+ # Temporary fix to handle NaN values
+ # n_splits = decision_rule_cv_based_on_classes(each_class)
+ # cv = StratifiedKFold(n_splits=n_splits)
features = input_data.drop(target_name, axis=1).values
target = input_data[target_name].values
@@ -382,14 +420,27 @@ def generate_results(model, input_data,
# # plot learning curve
# plot_learning_curve(tmpdir,_id, model,features,target,cv,return_times=True)
-
+ # StratifiedKFold
+ # stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
+ # Initialize RepeatedStratifiedKFold
+
+ # n_splits = 2
+ # n_repeats = 2
+ # stratified_cv = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=42)
+
+ # print("stratified_cv", stratified_cv)
# computing cross-validated metrics
+
+ # Temporary fix to handle NaN values
+ stratified_cv = StratifiedKFold(n_splits=8)
+
+
cv_scores = cross_validate(
estimator=model,
X=features,
y=target,
scoring=scoring,
- cv=cv,
+ cv = stratified_cv,
return_train_score=True,
return_estimator=True
)
@@ -398,26 +449,73 @@ def generate_results(model, input_data,
train_scores = cv_scores['train_' + s]
test_scores = cv_scores['test_' + s]
+ print("train_scores", train_scores)
+ print("test_scores", test_scores)
+
+ # if abs(train_scores.mean()) is np.nan OR abs(test_scores.mean()) is np.nan
+ if np.isnan(abs(train_scores.mean())) or np.isnan(abs(test_scores.mean())):
+ print("777-NaN")
+ print("train_scores", train_scores)
+ print("test_scores", test_scores)
+
# remove _macro
score_name = s.replace('_macro', '')
# make balanced_accuracy as default score
if score_name in ["balanced_accuracy", "neg_mean_squared_error"]:
scores['train_score'] = abs(train_scores.mean())
scores['test_score'] = abs(test_scores.mean())
+
+ # Temporary fix to handle NaN values
+ if np.isnan(scores['train_score']):
+ scores['train_score'] = np.nanmean(train_scores)
+ if np.isnan(scores['test_score']):
+ scores['test_score'] = np.nanmean(test_scores)
# for api will fix later
if score_name == "balanced_accuracy":
scores['accuracy_score'] = test_scores.mean()
+ # Temporary fix to handle NaN values
+ if np.nanmean(test_scores)!=np.nan:
+ scores['accuracy_score'] = np.nanmean(test_scores)
+ else:
+ scores['accuracy_score'] = 0
# for experiment tables
if score_name == "balanced_accuracy" or score_name == "r2":
scores['exp_table_score'] = test_scores.mean()
+ # Temporary fix to handle NaN values
+ if np.nanmean(test_scores)!=np.nan:
+ scores['exp_table_score'] = np.nanmean(test_scores)
+ else:
+ scores['exp_table_score'] = 0
if score_name in ["neg_mean_squared_error", "neg_mean_absolute_error"]:
scores['train_{}_score'.format(score_name)] = abs(
train_scores.mean())
+ # Temporary fix to handle NaN values
+ if np.nanmean(train_scores)!=np.nan:
+ scores['train_{}_score'.format(score_name)] = np.nanmean(
+ train_scores)
+ else:
+ scores['train_{}_score'.format(score_name)] = 0
scores['{}_score'.format(score_name)] = abs(test_scores.mean())
+ # Temporary fix to handle NaN values
+ if np.nanmean(test_scores)!=np.nan:
+ scores['{}_score'.format(score_name)] = np.nanmean(test_scores)
+ else:
+ scores['{}_score'.format(score_name)] = 0
else:
scores['train_{}_score'.format(score_name)] = train_scores.mean()
+ # Temporary fix to handle NaN values
+ if np.nanmean(train_scores)!=np.nan:
+ scores['train_{}_score'.format(score_name)] = np.nanmean(
+ train_scores)
+ else:
+ scores['train_{}_score'.format(score_name)] = 0
scores['{}_score'.format(score_name)] = test_scores.mean()
+ # Temporary fix to handle NaN values
+ if np.nanmean(test_scores)!=np.nan:
+ scores['{}_score'.format(score_name)] = np.nanmean(test_scores)
+ else:
+ scores['{}_score'.format(score_name)] = 0
# dump fitted module as pickle file
export_model(tmpdir, _id, model, filename, target_name, mode, random_state)
@@ -686,7 +784,9 @@ def plot_confusion_matrix(
None
"""
pred_y = np.empty(y.shape)
- cv = StratifiedKFold(n_splits=10)
+ # cv = StratifiedKFold(n_splits=10)
+ # Temporary fix to handle NaN values
+ cv = StratifiedKFold(n_splits=8)
for cv_split, est in zip(cv.split(X, y), cv_scores['estimator']):
train, test = cv_split
pred_y[test] = est.predict(X[test])
@@ -979,12 +1079,13 @@ def plot_roc_curve(tmpdir, _id, X, y, cv_scores, figure_export):
"""
from scipy import interp
from scipy.stats import sem, t
- cv = StratifiedKFold(n_splits=10)
+ # cv = StratifiedKFold(n_splits=10)
+ # Temporary fix to handle NaN values
+ cv = StratifiedKFold(n_splits=8)
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
- # print(cv_scores['train_roc_auc'])
for cv_split, est in zip(cv.split(X, y), cv_scores['estimator']):
train, test = cv_split
try:
@@ -998,8 +1099,16 @@ def plot_roc_curve(tmpdir, _id, X, y, cv_scores, figure_export):
[list(est.classes_).index(c)
for c in y[test]], dtype=np.int
)
+ # print("Each classes_encoded:", classes_encoded)
fpr, tpr, thresholds = roc_curve(classes_encoded, probas_)
+
+ # Temporary fix to handle NaN values
+ # When the given data is extremely unbalanced, as illustrated by the example where classes_encoded consists solely of the class 0, both true positives (TP) and false negatives (FN) are zero. Consequently, the true positive rate (TPR) is calculated as TPR = TP / (TP + FN), which results in an undefined value (NaN) due to division by zero. In the specific scenario provided, where roc_curve([0,0,0], [0,0.9,0]) is called, it highlights a situation with no positive instances present in the true labels. For purposes of data visualization or further analysis where a numerical value is required, this NaN value is replaced with 0 to indicate the absence of true positives under these conditions.
+ fpr = np.nan_to_num(fpr)
+ tpr = np.nan_to_num(tpr)
+
tprs.append(interp(mean_fpr, fpr, tpr))
+
tprs[-1][0] = 0.0
roc_auc = auc(fpr, tpr)
aucs.append(roc_auc)
@@ -1047,6 +1156,7 @@ def plot_roc_curve(tmpdir, _id, X, y, cv_scores, figure_export):
'tpr': mean_tpr.tolist(),
'roc_auc_score': mean_auc
}
+ print("roc_curve_dict:", roc_curve_dict)
file_name = 'roc_curve' + '.json'
save_json_fmt(outdir=tmpdir, _id=_id,
@@ -1159,6 +1269,18 @@ def plot_learning_curve(tmpdir, _id, model, features, target, cv, return_times=T
# replace nan with -1
test_scores = np.nan_to_num(test_scores, nan=-1)
+
+
+
+ # temp solution for nan values
+ train_sizes = np.nan_to_num(train_sizes, nan=-1)
+ train_scores = np.nan_to_num(train_scores, nan=-1)
+ test_scores = np.nan_to_num(test_scores, nan=-1)
+
+ print("train_sizes.tolist():", train_sizes.tolist())
+ print("train_scores.tolist():", train_scores.tolist())
+ print("test_scores.tolist():", test_scores.tolist())
+
learning_curve_dict = {
'train_sizes': train_sizes.tolist(),
'train_scores': train_scores.tolist(),
@@ -1240,13 +1362,11 @@ def plot_pca_3d(tmpdir, _id, features, target):
# np.random.seed(5)
# iris = datasets.load_iris()
- # print(features)
+
X = np.array(features)
y = np.array(target)
y[y == -1] = 0
- # print(X)
- # print(y)
fig = plt.figure(1, figsize=(4, 3))
plt.clf()
From 6a37ab8341e84611287d7ee832576d1516a32cb0 Mon Sep 17 00:00:00 2001
From: choi
Date: Tue, 12 Mar 2024 11:54:06 -0700
Subject: [PATCH 05/15] Addressed issues with an extremely imbalanced and small
dataset by removing NaN values from the metrics. (This is a temporary fix.)
---
machine/learn/skl_utils.py | 52 +++++++++++++-------------------------
1 file changed, 18 insertions(+), 34 deletions(-)
diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py
index 7a9ba8ccb..45d6ef996 100644
--- a/machine/learn/skl_utils.py
+++ b/machine/learn/skl_utils.py
@@ -175,8 +175,8 @@ def get_column_names_from_ColumnTransformer(column_transformer, feature_names):
new_feature_names += feature_columns
return new_feature_names
-# decision rule for cross validation 2, 3, 4, 5, 6, 7, 8, 9, 10
-def decision_rule_cv_based_on_classes(each_class):
+# decision rule for choosing number of folds based on the class distribution in the given dataset
+def decision_rule_fold_cv_based_on_classes(each_class):
"""
Adjusts the number of cross-validation folds based on the class distribution.
@@ -188,23 +188,19 @@ def decision_rule_cv_based_on_classes(each_class):
Returns
-------
cv : int
- Adjusted number of cross-validation folds.
+ The suitable number of cross-validation folds ensuring that each fold can include instances of each class.
"""
- # Find the class with the minimum number of samples based on the class sample counts
- min_samples = min(each_class.values())
+ # Find the minimum class count to ensure every fold can contain at least one instance of every class.
+ min_class_count = min(each_class.values())
- # Calculate the number of classes
- n_classes = len(each_class)
+ # The maximum number of folds is determined by the smallest class to ensure representation in each fold.
+ # However, we cannot have more folds than the minimum class count.
+ n_folds = min(10, min_class_count) # Starting with a default max of 10 folds
- # Determine the appropriate number of cv folds based on the class with the minimum samples
- if n_classes == 2:
- # For binary classification, ensure at least one sample of each class is present in the folds, to the extent possible
- n_split = min(max(2, min_samples), 10)
- else:
- # For multi-class, use more folds if possible to balance between classes
- n_split = min(max(3, min_samples), 10)
+ # Ensure at least 2 folds for meaningful cross-validation.
+ n_folds = max(n_folds, 2)
- return n_split
+ return n_folds
def generate_results(model, input_data,
tmpdir, _id, target_name='class',
@@ -418,21 +414,8 @@ def generate_results(model, input_data,
target, cv, return_times=True)
model.fit(features, target)
- # # plot learning curve
- # plot_learning_curve(tmpdir,_id, model,features,target,cv,return_times=True)
- # StratifiedKFold
- # stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
- # Initialize RepeatedStratifiedKFold
+
- # n_splits = 2
- # n_repeats = 2
- # stratified_cv = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=42)
-
- # print("stratified_cv", stratified_cv)
- # computing cross-validated metrics
-
- # Temporary fix to handle NaN values
- stratified_cv = StratifiedKFold(n_splits=8)
cv_scores = cross_validate(
@@ -440,7 +423,8 @@ def generate_results(model, input_data,
X=features,
y=target,
scoring=scoring,
- cv = stratified_cv,
+ # cv = stratified_cv,
+ cv = cv,
return_train_score=True,
return_estimator=True
)
@@ -784,9 +768,9 @@ def plot_confusion_matrix(
None
"""
pred_y = np.empty(y.shape)
- # cv = StratifiedKFold(n_splits=10)
+ cv = StratifiedKFold(n_splits=10)
# Temporary fix to handle NaN values
- cv = StratifiedKFold(n_splits=8)
+ # cv = StratifiedKFold(n_splits=8)
for cv_split, est in zip(cv.split(X, y), cv_scores['estimator']):
train, test = cv_split
pred_y[test] = est.predict(X[test])
@@ -1079,9 +1063,9 @@ def plot_roc_curve(tmpdir, _id, X, y, cv_scores, figure_export):
"""
from scipy import interp
from scipy.stats import sem, t
- # cv = StratifiedKFold(n_splits=10)
+ cv = StratifiedKFold(n_splits=10)
# Temporary fix to handle NaN values
- cv = StratifiedKFold(n_splits=8)
+ # cv = StratifiedKFold(n_splits=8)
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
From ac06865ea64cd64146f970eacdff48f9c164b166 Mon Sep 17 00:00:00 2001
From: choi
Date: Tue, 12 Mar 2024 12:03:07 -0700
Subject: [PATCH 06/15] remove unnecessary comments
---
machine/learn/skl_utils.py | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/machine/learn/skl_utils.py b/machine/learn/skl_utils.py
index 45d6ef996..49afe5b80 100644
--- a/machine/learn/skl_utils.py
+++ b/machine/learn/skl_utils.py
@@ -275,11 +275,7 @@ def generate_results(model, input_data,
# calculate number of each class
each_class = input_data[target_name].value_counts()
- print("each_class", each_class)
- print("num_classes", num_classes)
- # Temporary fix to handle NaN values
- # n_splits = decision_rule_cv_based_on_classes(each_class)
- # cv = StratifiedKFold(n_splits=n_splits)
+
features = input_data.drop(target_name, axis=1).values
target = input_data[target_name].values
From 859ef68c5b3d8e4604ec4dc7a89a8185d85972ef Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 15 Mar 2024 10:50:02 -0700
Subject: [PATCH 07/15] remove unnecessary comments
---
machine/learn/driver.py | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/machine/learn/driver.py b/machine/learn/driver.py
index bf4af9489..59f8980d1 100644
--- a/machine/learn/driver.py
+++ b/machine/learn/driver.py
@@ -92,15 +92,3 @@ def main(args, param_grid={}):
args, param_grid = parse_args()
main(args, param_grid)
-
-
-
-
-
-
- # # args
- # args= {'method': 'DecisionTreeClassifier', '_id': '631a1ca11b74ba0031813fbd', 'grid_search': False, 'criterion': 'gini', 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.0, 'max_features': None}
- # # param_grid
- # param_grid = {'n_estimators': [100], 'learning_rate': [0.01, 0.1, 1.0], 'max_depth': [1, 3, 5, 10], 'min_child_weight': [1, 3, 5, 10, 20], 'subsample': [0.5, 1.0]}
-
- # main(args, param_grid)
From befeafc1e399b62fc0f26d17e08a675db8c0429c Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 15 Mar 2024 10:51:33 -0700
Subject: [PATCH 08/15] update gitignore
---
.gitignore | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index a6a17b7bd..5e06cf6b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -66,4 +66,5 @@ MANIFEST
package-lock.json
package.json
package-copy.json
-machine/code_runs/
\ No newline at end of file
+machine/code_runs/
+machine/test_trained_models/
\ No newline at end of file
From 12520082de02b2bb8d8082e8cb3c4c6fafe737ab Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 15 Mar 2024 10:52:11 -0700
Subject: [PATCH 09/15] add GaussianNB, Xgboost, MLP
---
docker/dbmongo/files/projects.json | 1912 +++++++++++++++-------------
docker/dbmongo/files/users.json | 99 +-
2 files changed, 1064 insertions(+), 947 deletions(-)
diff --git a/docker/dbmongo/files/projects.json b/docker/dbmongo/files/projects.json
index fbf610bb7..469f8b857 100644
--- a/docker/dbmongo/files/projects.json
+++ b/docker/dbmongo/files/projects.json
@@ -1,1092 +1,1194 @@
-[{
+[
+ {
"name": "BernoulliNB",
"path": "sklearn.naive_bayes",
"categorical_encoding_strategy": "OneHotEncoder",
"description": "Naive Bayes classifier for multivariate Bernoulli models.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.BernoulliNB.html",
"schema": {
- "alpha": {
- "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.001, 0.01, 0.1, 1, 10, 100]
- }
- },
- "binarize": {
- "description": "Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.",
- "type": "float",
- "default": 0,
- "ui": {
- "style": "radio",
- "choices": [0, 0.25, 0.5, 0.75, 1]
- }
- },
- "fit_prior": {
- "description": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used.",
- "type": "bool",
- "default": "true",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
+ "alpha": {
+ "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.001, 0.01, 0.1, 1, 10, 100]
}
+ },
+ "binarize": {
+ "description": "Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.",
+ "type": "float",
+ "default": 0,
+ "ui": {
+ "style": "radio",
+ "choices": [0, 0.25, 0.5, 0.75, 1]
+ }
+ },
+ "fit_prior": {
+ "description": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used.",
+ "type": "bool",
+ "default": "true",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
+ }
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "GaussianNB",
"path": "sklearn.naive_bayes",
"categorical_encoding_strategy": "OneHotEncoder",
"description": "Gaussian Naive Bayes",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html",
- "schema": {},
+ "schema": {
+ "var_smoothing": {
+ "description": "Portion of the largest variance of all features that is added to variances for calculation stability.",
+ "type": "float",
+ "default": 1e-9,
+ "ui": {
+ "style": "radio",
+ "choices": [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
+ }
+ }
+ },
"category": "classification"
-},
-{
+ },
+ {
"name": "MultinomialNB",
"path": "sklearn.naive_bayes",
"categorical_encoding_strategy": "OneHotEncoder",
"description": "Naive Bayes classifier for multinomial models.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html",
"schema": {
- "alpha": {
- "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).",
- "type": "float",
- "default": 1.0,
- "ui": {
- "style": "radio",
- "choices": [0.001, 0.01, 0.1, 1.0, 10, 100]
- }
- },
- "fit_prior": {
- "description": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used.",
- "type": "bool",
- "default": "true",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
+ "alpha": {
+ "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).",
+ "type": "float",
+ "default": 1.0,
+ "ui": {
+ "style": "radio",
+ "choices": [0.001, 0.01, 0.1, 1.0, 10, 100]
+ }
+ },
+ "fit_prior": {
+ "description": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used.",
+ "type": "bool",
+ "default": "true",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
}
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "DecisionTreeClassifier",
"path": "sklearn.tree",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "Classifier that assigns a class to a sample based on a chained series of yes/no queries about the sample's features.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html",
"schema": {
- "criterion": {
- "description": "The function to measure the quality of a split. Supported criteria are âginiâ for the Gini impurity and âentropyâ for the information gain.",
- "type": "string",
- "default": "gini",
- "ui": {
- "style": "radio",
- "choices": ["Gini impurity", "Information gain"],
- "values": ["gini", "entropy"]
- }
- },
- "max_depth": {
- "description": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.",
- "type": ["int", "none"],
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [3, 5, 10]
- }
- },
- "min_samples_split": {
- "description": "The minimum number of samples required to split an internal node.",
- "type": ["int", "float"],
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [2, 5, 10, 20]
- }
- },
- "min_samples_leaf": {
- "description": "The minimum number of samples required to be at a leaf node.",
- "type": ["int", "float"],
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [1, 5, 10, 20]
- }
- },
- "min_weight_fraction_leaf": {
- "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.",
- "type": "float",
- "default": 0.0,
- "ui": {
- "style": "radio",
- "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
- }
- },
- "max_features": {
- "description": "The number of features to consider when looking for the best split.",
- "type": ["int", "float", "string", "none"],
- "default": "sqrt",
- "ui": {
- "style": "radio",
- "choices": ["Square root", "Log2", "None"],
- "values": ["sqrt", "log2", "None"]
- }
+ "criterion": {
+ "description": "The function to measure the quality of a split. Supported criteria are âginiâ for the Gini impurity and âentropyâ for the information gain.",
+ "type": "string",
+ "default": "gini",
+ "ui": {
+ "style": "radio",
+ "choices": ["Gini impurity", "Information gain"],
+ "values": ["gini", "entropy"]
+ }
+ },
+ "max_depth": {
+ "description": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.",
+ "type": ["int", "none"],
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [3, 5, 10]
+ }
+ },
+ "min_samples_split": {
+ "description": "The minimum number of samples required to split an internal node.",
+ "type": ["int", "float"],
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 5, 10, 20]
}
+ },
+ "min_samples_leaf": {
+ "description": "The minimum number of samples required to be at a leaf node.",
+ "type": ["int", "float"],
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 5, 10, 20]
+ }
+ },
+ "min_weight_fraction_leaf": {
+ "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.",
+ "type": "float",
+ "default": 0.0,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
+ }
+ },
+ "max_features": {
+ "description": "The number of features to consider when looking for the best split.",
+ "type": ["int", "float", "string", "none"],
+ "default": "sqrt",
+ "ui": {
+ "style": "radio",
+ "choices": ["Square root", "Log2", "None"],
+ "values": ["sqrt", "log2", "None"]
+ }
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "ExtraTreesClassifier",
"path": "sklearn.ensemble",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "Extremely Randomized Trees",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html",
"schema": {
- "n_estimators": {
- "description": "The number of trees in the forest.",
- "type": "int",
- "default": 100,
- "ui": {
- "style": "radio",
- "choices": [100, 500],
- "grid_search": [100]
- }
- },
- "criterion": {
- "description": "The function to measure the quality of a split. Supported criteria are âginiâ for the Gini impurity and âentropyâ for the information gain.",
- "type": "string",
- "values": ["gini", "entropy"],
- "default": "gini",
- "ui": {
- "style": "radio",
- "choices": ["Gini impurity", "Information gain"],
- "values": ["gini", "entropy"]
- }
- },
- "max_features": {
- "description": "The number of features to consider when looking for the best split.",
- "type": ["int", "float", "string", "none"],
- "default": "sqrt",
- "ui": {
- "style": "radio",
- "choices": ["Square root", "Log2", "None"],
- "values": ["sqrt", "log2", "None"]
- }
- },
- "min_samples_split": {
- "description": "The minimum number of samples required to split an internal node.",
- "type": ["int", "float"],
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [2, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "min_samples_leaf": {
- "description": "The minimum number of samples required to be at a leaf node.",
- "type": ["int", "float"],
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [1, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "bootstrap": {
- "description": "Whether bootstrap samples are used when building trees.",
- "type": "bool",
- "default": "false",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
+ "n_estimators": {
+ "description": "The number of trees in the forest.",
+ "type": "int",
+ "default": 100,
+ "ui": {
+ "style": "radio",
+ "choices": [100, 500],
+ "grid_search": [100]
}
+ },
+ "criterion": {
+ "description": "The function to measure the quality of a split. Supported criteria are âginiâ for the Gini impurity and âentropyâ for the information gain.",
+ "type": "string",
+ "values": ["gini", "entropy"],
+ "default": "gini",
+ "ui": {
+ "style": "radio",
+ "choices": ["Gini impurity", "Information gain"],
+ "values": ["gini", "entropy"]
+ }
+ },
+ "max_features": {
+ "description": "The number of features to consider when looking for the best split.",
+ "type": ["int", "float", "string", "none"],
+ "default": "sqrt",
+ "ui": {
+ "style": "radio",
+ "choices": ["Square root", "Log2", "None"],
+ "values": ["sqrt", "log2", "None"]
+ }
+ },
+ "min_samples_split": {
+ "description": "The minimum number of samples required to split an internal node.",
+ "type": ["int", "float"],
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "min_samples_leaf": {
+ "description": "The minimum number of samples required to be at a leaf node.",
+ "type": ["int", "float"],
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "bootstrap": {
+ "description": "Whether bootstrap samples are used when building trees.",
+ "type": "bool",
+ "default": "false",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
+ }
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "GradientBoostingClassifier",
"path": "sklearn.ensemble",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "An ensemble of decision trees that are iteratively trained on the dataset for the optimization of arbitrary differentiable loss functions.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html",
"schema": {
- "n_estimators": {
- "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.",
- "type": "int",
- "default": 100,
- "ui": {
- "style": "radio",
- "choices": [100, 500],
- "grid_search": [100]
- }
- },
- "learning_rate": {
- "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
- "type": "float",
- "default": 0.1,
- "ui": {
- "style": "radio",
- "choices": [0.01, 0.1, 1]
- }
- },
- "max_depth": {
- "description": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.",
- "type": ["int", "none"],
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 10]
- }
- },
- "min_samples_split": {
- "description": "The minimum number of samples required to split an internal node.",
- "type": ["int", "float"],
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [2, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "min_samples_leaf": {
- "description": "The minimum number of samples required to be at a leaf node.",
- "type": ["int", "float"],
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [1, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "subsample": {
- "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.5, 1]
- }
- },
- "max_features": {
- "description": "The number of features to consider when looking for the best split.",
- "type": ["int", "float", "string", "none"],
- "default": "sqrt",
- "ui": {
- "style": "radio",
- "choices": ["Square root", "Log2"],
- "values": ["sqrt", "log2"]
- }
+ "n_estimators": {
+ "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.",
+ "type": "int",
+ "default": 100,
+ "ui": {
+ "style": "radio",
+ "choices": [100, 500],
+ "grid_search": [100]
+ }
+ },
+ "learning_rate": {
+ "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
+ "type": "float",
+ "default": 0.1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.01, 0.1, 1]
+ }
+ },
+ "max_depth": {
+ "description": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.",
+ "type": ["int", "none"],
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 10]
+ }
+ },
+ "min_samples_split": {
+ "description": "The minimum number of samples required to split an internal node.",
+ "type": ["int", "float"],
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "min_samples_leaf": {
+ "description": "The minimum number of samples required to be at a leaf node.",
+ "type": ["int", "float"],
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "subsample": {
+ "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.5, 1]
+ }
+ },
+ "max_features": {
+ "description": "The number of features to consider when looking for the best split.",
+ "type": ["int", "float", "string", "none"],
+ "default": "sqrt",
+ "ui": {
+ "style": "radio",
+ "choices": ["Square root", "Log2"],
+ "values": ["sqrt", "log2"]
}
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "XGBClassifier",
"path": "xgboost",
"categorical_encoding_strategy": "OrdinalEncoder",
- "description": "eXtreme Gradient Boosting classification",
+ "description": "eXtreme Gradient Boosting classifier for supervised learning tasks.",
"url": "https://xgboost.readthedocs.io/en/latest/tutorials/model.html",
+ "static_parameters": { "objective": "binary:logistic" },
"schema": {
- "n_estimators": {
- "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.",
- "type": "int",
- "default": 100,
- "ui": {
- "style": "radio",
- "choices": [100, 500],
- "grid_search": [100]
- }
- },
- "learning_rate": {
- "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
- "type": "float",
- "default": 0.1,
- "ui": {
- "style": "radio",
- "choices": [0.01, 0.1, 1]
- }
- },
- "max_depth": {
- "description": "Maximum tree depth for base learners.",
- "type": "int",
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 10]
- }
- },
- "min_child_weight": {
- "description": "Minimum sum of instance weight(hessian) needed in a child.",
- "type": "int",
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 10, 20]
- }
- },
- "subsample": {
- "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.5, 1]
- }
+ "n_estimators": {
+ "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting, so a large number usually results in better performance.",
+ "type": "int",
+ "default": 100,
+ "ui": {
+ "style": "radio",
+ "choices": [100, 500],
+ "grid_search": [100]
+ }
+ },
+ "learning_rate": {
+ "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
+ "type": "float",
+ "default": 0.1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.01, 0.1, 1]
+ }
+ },
+ "max_depth": {
+ "description": "Maximum tree depth for base learners.",
+ "type": "int",
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 10]
+ }
+ },
+ "min_child_weight": {
+ "description": "Minimum sum of instance weight (hessian) needed in a child.",
+ "type": "int",
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 10, 20]
}
+ },
+ "subsample": {
+ "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0, this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample < 1.0 leads to a reduction of variance and an increase in bias.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.5, 1]
+ }
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "KNeighborsClassifier",
"path": "sklearn.neighbors",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "Nearest-neighbor classifier that classifies new data points based on the most common class among the k nearest data points.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html",
"schema": {
- "n_neighbors": {
- "description": "Number of neighbors to use by default for k_neighbors queries.",
- "type": "int",
- "default": 5,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 7, 9, 11]
- }
- },
- "weights": {
- "description": "Weight function used in prediction.",
- "type": "string",
- "default": "uniform",
- "ui": {
- "style": "radio",
- "choices": ["Uniform", "Distance"],
- "values": ["uniform", "distance"]
- }
- },
- "p": {
- "description": "Power parameter for the Minkowski metric.",
- "type": "int",
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [1, 2]
- }
+ "n_neighbors": {
+ "description": "Number of neighbors to use by default for k_neighbors queries.",
+ "type": "int",
+ "default": 5,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 7, 9, 11]
}
+ },
+ "weights": {
+ "description": "Weight function used in prediction.",
+ "type": "string",
+ "default": "uniform",
+ "ui": {
+ "style": "radio",
+ "choices": ["Uniform", "Distance"],
+ "values": ["uniform", "distance"]
+ }
+ },
+ "p": {
+ "description": "Power parameter for the Minkowski metric.",
+ "type": "int",
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 2]
+ }
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "LinearSVC",
"path": "sklearn.svm",
"categorical_encoding_strategy": "OneHotEncoder",
"description": "Linear Support Vector Classification.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html",
- "invalidParameterCombinations" : [
- [{"penalty":"l2"}, {"loss":"hinge"}, {"dual":"false"}],
- [{"penalty":"l1"}, {"loss":"square_hinge"}, {"dual":"true"}],
- [{"penalty":"l1"}, {"loss":"hinge"}]
+ "invalidParameterCombinations": [
+ [{ "penalty": "l2" }, { "loss": "hinge" }, { "dual": "false" }],
+ [{ "penalty": "l1" }, { "loss": "square_hinge" }, { "dual": "true" }],
+ [{ "penalty": "l1" }, { "loss": "hinge" }]
],
"schema": {
- "penalty": {
- "description": "Specifies the norm used in the penalization. The âl2â penalty is the standard used in SVC. The âl1â leads to coef_ vectors that are sparse.",
- "type": "string",
- "default": "l2",
- "ui": {
- "style": "radio",
- "choices": ["L1", "L2"],
- "values": ["l1", "l2"]
- }
- },
- "loss": {
- "description": "Specifies the loss function. âhingeâ is the standard SVM loss (used e.g. by the SVC class) while âsquared_hingeâ is the square of the hinge loss.",
- "type": "string",
- "default": "squared_hinge",
- "ui": {
- "style": "radio",
- "choices": ["Hinge", "Squared hinge"],
- "values": ["hinge", "squared_hinge"]
- }
- },
- "dual": {
- "description": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples \u003e n_features.",
- "type": "bool",
- "default": "true",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
- },
- "tol": {
- "description": "Tolerance for stopping criteria.",
- "type": "float",
- "default": 0.0001,
- "ui": {
- "style": "radio",
- "choices": [1e-05, 0.0001, 0.001, 0.01, 0.1]
- }
- },
- "C": {
- "description": "Penalty parameter C of the error term.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
- }
+ "penalty": {
+ "description": "Specifies the norm used in the penalization. The âl2â penalty is the standard used in SVC. The âl1â leads to coef_ vectors that are sparse.",
+ "type": "string",
+ "default": "l2",
+ "ui": {
+ "style": "radio",
+ "choices": ["L1", "L2"],
+ "values": ["l1", "l2"]
+ }
+ },
+ "loss": {
+ "description": "Specifies the loss function. âhingeâ is the standard SVM loss (used e.g. by the SVC class) while âsquared_hingeâ is the square of the hinge loss.",
+ "type": "string",
+ "default": "squared_hinge",
+ "ui": {
+ "style": "radio",
+ "choices": ["Hinge", "Squared hinge"],
+ "values": ["hinge", "squared_hinge"]
+ }
+ },
+ "dual": {
+ "description": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples \u003e n_features.",
+ "type": "bool",
+ "default": "true",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
}
+ },
+ "tol": {
+ "description": "Tolerance for stopping criteria.",
+ "type": "float",
+ "default": 0.0001,
+ "ui": {
+ "style": "radio",
+ "choices": [1e-5, 0.0001, 0.001, 0.01, 0.1]
+ }
+ },
+ "C": {
+ "description": "Penalty parameter C of the error term.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
+ }
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "LogisticRegression",
"categorical_encoding_strategy": "OneHotEncoder",
"path": "sklearn.linear_model",
"description": "Basic logistic regression that makes predictions about the outcome based on a linear combination of the features.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html",
- "invalidParameterCombinations" : [
- [{"penalty":"l1"}, {"dual":"true"}]
- ],
- "static_parameters" : {"solver": "liblinear", "multi_class": "auto"},
+ "invalidParameterCombinations": [[{ "penalty": "l1" }, { "dual": "true" }]],
+ "static_parameters": { "solver": "liblinear", "multi_class": "auto" },
"schema": {
- "penalty": {
- "description": "Used to specify the norm used in the penalization. The ânewton-cgâ, âsagâ and âlbfgsâ solvers support only l2 penalties.",
- "type": "string",
- "default": "l2",
- "ui": {
- "style": "radio",
- "choices": ["L1", "L2"],
- "values": ["l1", "l2"]
- }
- },
- "C": {
- "description": "Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.",
- "type": "float",
- "default": 1.0,
- "ui": {
- "style": "radio",
- "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
- "grid_search": [0.0001, 0.01, 0.1, 0.5, 1, 10]
- }
- },
- "dual": {
- "description": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples \u003e n_features.",
- "type": "bool",
- "default": "false",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
- },
- "fit_intercept": {
- "description": "Fit intercept in addition to feature coefficients.",
- "type": "bool",
- "default": "true",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
+ "penalty": {
+ "description": "Used to specify the norm used in the penalization. The ânewton-cgâ, âsagâ and âlbfgsâ solvers support only l2 penalties.",
+ "type": "string",
+ "default": "l2",
+ "ui": {
+ "style": "radio",
+ "choices": ["L1", "L2"],
+ "values": ["l1", "l2"]
}
+ },
+ "C": {
+ "description": "Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.",
+ "type": "float",
+ "default": 1.0,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
+ "grid_search": [0.0001, 0.01, 0.1, 0.5, 1, 10]
+ }
+ },
+ "dual": {
+ "description": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples \u003e n_features.",
+ "type": "bool",
+ "default": "false",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
+ }
+ },
+ "fit_intercept": {
+ "description": "Fit intercept in addition to feature coefficients.",
+ "type": "bool",
+ "default": "true",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
+ }
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "RandomForestClassifier",
"path": "sklearn.ensemble",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "An ensemble of decision trees that are trained on random sub-samples of the dataset.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html",
"schema": {
- "n_estimators": {
- "description": "The number of trees in the forest.",
- "type": "int",
- "default": 100,
- "ui": {
- "style": "radio",
- "choices": [100, 500],
- "grid_search": [100]
- }
- },
- "criterion": {
- "description": "The function to measure the quality of a split. Supported criteria are âginiâ for the Gini impurity and âentropyâ for the information gain. Note: this parameter is tree-specific.",
- "type": "string",
- "default": "gini",
- "ui": {
- "style": "radio",
- "choices": ["Gini impurity", "Information gain"],
- "values": ["gini", "entropy"]
- }
- },
- "max_features": {
- "description": "The number of features to consider when looking for the best split.",
- "type": ["int", "float", "string", "none"],
- "default": "sqrt",
- "ui": {
- "style": "radio",
- "choices": ["Square root", "Log2"],
- "values": ["sqrt", "log2"]
- }
- },
- "min_samples_split": {
- "description": "The minimum number of samples required to split an internal node.",
- "type": ["int", "float"],
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [2, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "min_samples_leaf": {
- "description": "The minimum number of samples required to be at a leaf node.",
- "type": ["int", "float"],
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [1, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "bootstrap": {
- "description": "Whether bootstrap samples are used when building trees.",
- "type": "bool",
- "default": "true",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
- },
- "min_weight_fraction_leaf": {
- "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.",
- "type": "float",
- "default": 0.0,
- "ui": {
- "style": "radio",
- "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45],
- "grid_search": [0.0, 0.1, 0.2, 0.3, 0.4]
- }
+ "n_estimators": {
+ "description": "The number of trees in the forest.",
+ "type": "int",
+ "default": 100,
+ "ui": {
+ "style": "radio",
+ "choices": [100, 500],
+ "grid_search": [100]
+ }
+ },
+ "criterion": {
+ "description": "The function to measure the quality of a split. Supported criteria are âginiâ for the Gini impurity and âentropyâ for the information gain. Note: this parameter is tree-specific.",
+ "type": "string",
+ "default": "gini",
+ "ui": {
+ "style": "radio",
+ "choices": ["Gini impurity", "Information gain"],
+ "values": ["gini", "entropy"]
+ }
+ },
+ "max_features": {
+ "description": "The number of features to consider when looking for the best split.",
+ "type": ["int", "float", "string", "none"],
+ "default": "sqrt",
+ "ui": {
+ "style": "radio",
+ "choices": ["Square root", "Log2"],
+ "values": ["sqrt", "log2"]
+ }
+ },
+ "min_samples_split": {
+ "description": "The minimum number of samples required to split an internal node.",
+ "type": ["int", "float"],
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "min_samples_leaf": {
+ "description": "The minimum number of samples required to be at a leaf node.",
+ "type": ["int", "float"],
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "bootstrap": {
+ "description": "Whether bootstrap samples are used when building trees.",
+ "type": "bool",
+ "default": "true",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
+ }
+ },
+ "min_weight_fraction_leaf": {
+ "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.",
+ "type": "float",
+ "default": 0.0,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45],
+ "grid_search": [0.0, 0.1, 0.2, 0.3, 0.4]
}
+ }
},
"category": "classification"
-},
-{
+ },
+ {
"name": "SVC",
"path": "sklearn.svm",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "Kernel-based classifier that maps the data into a high-dimesional space then constructs a hyperplane that maximally separates the classes in that high-dimesional space.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html",
- "static_parameters" : {"cache_size": 700, "max_iter": 10000, "probability": true},
+ "static_parameters": {
+ "cache_size": 700,
+ "max_iter": 10000,
+ "probability": true
+ },
+ "schema": {
+ "kernel": {
+ "description": "Specifies the kernel type to be used in the algorithm",
+ "type": "string",
+ "default": "rbf",
+ "ui": {
+ "style": "radio",
+ "choices": ["Polynomial", "Radial basis function"],
+ "values": ["poly", "rbf"]
+ }
+ },
+ "tol": {
+ "description": "Tolerance for stopping criteria.",
+ "type": "float",
+ "default": 0.0001,
+ "ui": {
+ "style": "radio",
+ "choices": [1e-5, 0.0001, 0.001, 0.01, 0.1]
+ }
+ },
+ "C": {
+ "description": "Penalty parameter C of the error term.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
+ }
+ },
+ "gamma": {
+ "description": "Kernel coefficient for ârbfâ, âpolyâ and âsigmoidâ.",
+ "type": "float",
+ "default": 0.01,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
+ }
+ },
+ "degree": {
+ "description": "Degree of the 'poly' kernel.",
+ "type": "int",
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 3]
+ }
+ },
+ "coef0": {
+ "description": "Independent term in kernel function.",
+ "type": "float",
+ "default": 0.0,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10]
+ }
+ }
+ },
+ "category": "classification"
+ },
+ {
+ "name": "MLPClassifier",
+ "path": "sklearn.neural_network",
+ "categorical_encoding_strategy": "OneHotEncoder",
+ "description": "Multi-layer Perceptron classifier.",
+ "url": "http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html",
"schema": {
- "kernel": {
- "description": "Specifies the kernel type to be used in the algorithm",
- "type": "string",
- "default": "rbf",
- "ui": {
- "style": "radio",
- "choices": ["Polynomial", "Radial basis function"],
- "values": ["poly", "rbf"]
- }
- },
- "tol": {
- "description": "Tolerance for stopping criteria.",
- "type": "float",
- "default": 0.0001,
- "ui": {
- "style": "radio",
- "choices": [1e-05, 0.0001, 0.001, 0.01, 0.1]
- }
- },
- "C": {
- "description": "Penalty parameter C of the error term.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
- }
- },
- "gamma": {
- "description": "Kernel coefficient for ârbfâ, âpolyâ and âsigmoidâ.",
- "type": "float",
- "default": 0.01,
- "ui": {
- "style": "radio",
- "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
- }
- },
- "degree": {
- "description": "Degree of the 'poly' kernel.",
- "type": "int",
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [2, 3 ]
- }
- },
- "coef0": {
- "description": "Independent term in kernel function.",
- "type": "float",
- "default": 0.0 ,
- "ui": {
- "style": "radio",
- "choices": [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10]
- }
+ "activation": {
+ "description": "Activation function for the hidden layer.",
+ "type": "string",
+ "default": "relu",
+ "ui": {
+ "style": "radio",
+ "choices": ["Identity", "Logistic", "Tanh", "Relu"],
+ "values": ["identity", "logistic", "tanh", "relu"]
}
+ },
+ "solver": {
+ "description": "The solver for weight optimization.",
+ "type": "string",
+ "default": "adam",
+ "ui": {
+ "style": "radio",
+ "choices": ["Adam", "Lbfgs", "Sgd"],
+ "values": ["adam", "lbfgs", "sgd"]
+ }
+ },
+ "alpha": {
+ "description": "L2 penalty (regularization term) parameter.",
+ "type": "float",
+ "default": 0.0001,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10]
+ }
+ }
},
"category": "classification"
-},
-{
+ },
+ {
+ "name": "SGDClassifier",
+ "path": "sklearn.linear_model",
+ "categorical_encoding_strategy": "OneHotEncoder",
+ "description": "Linear classifiers (SVM, logistic regression, a.o.) with SGD training.",
+ "url": "http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html",
+ "schema": {
+ "loss": {
+ "description": "The loss function to be used. The possible values are âhingeâ, âlogâ, âmodified_huberâ, âsquared_hingeâ, âperceptronâ, or a regression loss: âsquared_lossâ, âhuberâ, âepsilon_insensitiveâ, or âsquared_epsilon_insensitiveâ.",
+ "type": "string",
+ "default": "hinge",
+ "ui": {
+ "style": "radio",
+ "choices": [
+ "Hinge",
+ "Log",
+ "Modified Huber",
+ "Squared Hinge",
+ "Perceptron",
+ "Squared Loss",
+ "Huber",
+ "Epsilon Insensitive",
+ "Squared Epsilon Insensitive"
+ ],
+ "values": [
+ "hinge",
+ "log",
+ "modified_huber",
+ "squared_hinge",
+ "perceptron",
+ "squared_loss",
+ "huber",
+ "epsilon_insensitive",
+ "squared_epsilon_insensitive"
+ ]
+ }
+ },
+ "learning_rate": {
+ "description": "The learning rate schedule. The possible values are âconstantâ, âoptimalâ, âinvscalingâ, âadaptiveâ.",
+ "type": "string",
+ "default": "optimal",
+ "ui": {
+ "style": "radio",
+ "choices": ["Constant", "Optimal", "Invscaling", "Adaptive"],
+ "values": ["constant", "optimal", "invscaling", "adaptive"]
+ }
+ }
+ }
+ },
+ {
"name": "DecisionTreeRegressor",
"path": "sklearn.tree",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "A Decision Tree Regressor",
"url": "https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html",
"schema": {
- "criterion": {
- "description": "The function to measure the quality of a split. ",
- "type": "string",
- "default": "mse",
- "ui": {
- "style": "radio",
- "choices": ["Mean Squared Error", "Mean Absolute Error"],
- "values": ["mse", "mae"]
- }
- },
- "max_depth": {
- "description": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.",
- "type": ["int", "none"],
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [3, 5, 10]
- }
- },
- "min_samples_split": {
- "description": "The minimum number of samples required to split an internal node.",
- "type": ["int", "float"],
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [2, 5, 10, 20]
- }
- },
- "min_samples_leaf": {
- "description": "The minimum number of samples required to be at a leaf node.",
- "type": ["int", "float"],
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [1, 5, 10, 20]
- }
- },
- "min_weight_fraction_leaf": {
- "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.",
- "type": "float",
- "default": 0.0,
- "ui": {
- "style": "radio",
- "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
- }
- },
- "max_features": {
- "description": "The number of features to consider when looking for the best split.",
- "type": ["int", "float", "string", "none"],
- "default": "sqrt",
- "ui": {
- "style": "radio",
- "choices": ["Square root", "Log2", "None"],
- "values": ["sqrt", "log2", "None"]
- }
+ "criterion": {
+ "description": "The function to measure the quality of a split. ",
+ "type": "string",
+ "default": "mse",
+ "ui": {
+ "style": "radio",
+ "choices": ["Mean Squared Error", "Mean Absolute Error"],
+ "values": ["mse", "mae"]
+ }
+ },
+ "max_depth": {
+ "description": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.",
+ "type": ["int", "none"],
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [3, 5, 10]
+ }
+ },
+ "min_samples_split": {
+ "description": "The minimum number of samples required to split an internal node.",
+ "type": ["int", "float"],
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 5, 10, 20]
+ }
+ },
+ "min_samples_leaf": {
+ "description": "The minimum number of samples required to be at a leaf node.",
+ "type": ["int", "float"],
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 5, 10, 20]
}
+ },
+ "min_weight_fraction_leaf": {
+ "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.",
+ "type": "float",
+ "default": 0.0,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
+ }
+ },
+ "max_features": {
+ "description": "The number of features to consider when looking for the best split.",
+ "type": ["int", "float", "string", "none"],
+ "default": "sqrt",
+ "ui": {
+ "style": "radio",
+ "choices": ["Square root", "Log2", "None"],
+ "values": ["sqrt", "log2", "None"]
+ }
+ }
},
"category": "regression"
-},
-{
+ },
+ {
"name": "RandomForestRegressor",
"path": "sklearn.ensemble",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "An ensemble of decision trees that are trained on random sub-samples of the dataset.",
"url": "https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html",
"schema": {
- "n_estimators": {
- "description": "The number of trees in the forest.",
- "type": "int",
- "default": 100,
- "ui": {
- "style": "radio",
- "choices": [100, 500],
- "grid_search": [100]
- }
- },
- "criterion": {
- "description": "The function to measure the quality of a split. ",
- "type": "string",
- "default": "mse",
- "ui": {
- "style": "radio",
- "choices": ["Mean Squared Error", "Mean Absolute Error"],
- "values": ["mse", "mae"]
- }
- },
- "max_features": {
- "description": "The number of features to consider when looking for the best split.",
- "type": ["int", "float", "string", "none"],
- "default": "sqrt",
- "ui": {
- "style": "radio",
- "choices": ["Square root", "Log2"],
- "values": ["sqrt", "log2"]
- }
- },
- "min_samples_split": {
- "description": "The minimum number of samples required to split an internal node.",
- "type": ["int", "float"],
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [2, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "min_samples_leaf": {
- "description": "The minimum number of samples required to be at a leaf node.",
- "type": ["int", "float"],
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [1, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "bootstrap": {
- "description": "Whether bootstrap samples are used when building trees.",
- "type": "bool",
- "default": "true",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
- },
- "min_weight_fraction_leaf": {
- "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.",
- "type": "float",
- "default": 0.0,
- "ui": {
- "style": "radio",
- "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45],
- "grid_search": [0.0, 0.1, 0.2, 0.3, 0.4]
- }
+ "n_estimators": {
+ "description": "The number of trees in the forest.",
+ "type": "int",
+ "default": 100,
+ "ui": {
+ "style": "radio",
+ "choices": [100, 500],
+ "grid_search": [100]
+ }
+ },
+ "criterion": {
+ "description": "The function to measure the quality of a split. ",
+ "type": "string",
+ "default": "mse",
+ "ui": {
+ "style": "radio",
+ "choices": ["Mean Squared Error", "Mean Absolute Error"],
+ "values": ["mse", "mae"]
+ }
+ },
+ "max_features": {
+ "description": "The number of features to consider when looking for the best split.",
+ "type": ["int", "float", "string", "none"],
+ "default": "sqrt",
+ "ui": {
+ "style": "radio",
+ "choices": ["Square root", "Log2"],
+ "values": ["sqrt", "log2"]
+ }
+ },
+ "min_samples_split": {
+ "description": "The minimum number of samples required to split an internal node.",
+ "type": ["int", "float"],
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 5, 10, 20],
+ "grid_search": [10, 20]
}
+ },
+ "min_samples_leaf": {
+ "description": "The minimum number of samples required to be at a leaf node.",
+ "type": ["int", "float"],
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "bootstrap": {
+ "description": "Whether bootstrap samples are used when building trees.",
+ "type": "bool",
+ "default": "true",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
+ }
+ },
+ "min_weight_fraction_leaf": {
+ "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.",
+ "type": "float",
+ "default": 0.0,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45],
+ "grid_search": [0.0, 0.1, 0.2, 0.3, 0.4]
+ }
+ }
},
"category": "regression"
-},
-{
+ },
+ {
"name": "SVR",
"path": "sklearn.svm",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "Kernel-based regressor that maps the data into a high-dimesional space then constructs a hyperplane that maximally separates the classes in that high-dimesional space.",
"url": "http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html",
- "static_parameters" : {"cache_size": 700, "max_iter": 10000},
+ "static_parameters": { "cache_size": 700, "max_iter": 10000 },
"schema": {
- "kernel": {
- "description": "Specifies the kernel type to be used in the algorithm",
- "type": "string",
- "default": "rbf",
- "ui": {
- "style": "radio",
- "choices": ["Polynomial", "Radial basis function"],
- "values": ["poly", "rbf"]
- }
- },
- "tol": {
- "description": "Tolerance for stopping criteria.",
- "type": "float",
- "default": 0.0001,
- "ui": {
- "style": "radio",
- "choices": [1e-05, 0.0001, 0.001, 0.01, 0.1]
- }
- },
- "C": {
- "description": "Penalty parameter C of the error term.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
- }
- },
- "gamma": {
- "description": "Kernel coefficient for ârbfâ, âpolyâ and âsigmoidâ.",
- "type": "float",
- "default": 0.01,
- "ui": {
- "style": "radio",
- "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
- }
- },
- "degree": {
- "description": "Degree of the 'poly' kernel.",
- "type": "int",
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [2, 3 ]
- }
- },
- "coef0": {
- "description": "Independent term in kernel function.",
- "type": "float",
- "default": 0.0 ,
- "ui": {
- "style": "radio",
- "choices": [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10]
- }
+ "kernel": {
+ "description": "Specifies the kernel type to be used in the algorithm",
+ "type": "string",
+ "default": "rbf",
+ "ui": {
+ "style": "radio",
+ "choices": ["Polynomial", "Radial basis function"],
+ "values": ["poly", "rbf"]
+ }
+ },
+ "tol": {
+ "description": "Tolerance for stopping criteria.",
+ "type": "float",
+ "default": 0.0001,
+ "ui": {
+ "style": "radio",
+ "choices": [1e-5, 0.0001, 0.001, 0.01, 0.1]
+ }
+ },
+ "C": {
+ "description": "Penalty parameter C of the error term.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
+ }
+ },
+ "gamma": {
+ "description": "Kernel coefficient for ârbfâ, âpolyâ and âsigmoidâ.",
+ "type": "float",
+ "default": 0.01,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
}
+ },
+ "degree": {
+ "description": "Degree of the 'poly' kernel.",
+ "type": "int",
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 3]
+ }
+ },
+ "coef0": {
+ "description": "Independent term in kernel function.",
+ "type": "float",
+ "default": 0.0,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10]
+ }
+ }
},
"category": "regression"
-},
-{
+ },
+ {
"name": "KNeighborsRegressor",
"path": "sklearn.neighbors",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "Nearest-neighbor regressor that classifies new data points based on the most common class among the k nearest data points.",
"url": "https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html",
"schema": {
- "n_neighbors": {
- "description": "Number of neighbors to use by default for k_neighbors queries.",
- "type": "int",
- "default": 5,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 7, 9, 11]
- }
- },
- "weights": {
- "description": "Weight function used in prediction.",
- "type": "string",
- "default": "uniform",
- "ui": {
- "style": "radio",
- "choices": ["Uniform", "Distance"],
- "values": ["uniform", "distance"]
- }
- },
- "p": {
- "description": "Power parameter for the Minkowski metric.",
- "type": "int",
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [1, 2]
- }
+ "n_neighbors": {
+ "description": "Number of neighbors to use by default for k_neighbors queries.",
+ "type": "int",
+ "default": 5,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 7, 9, 11]
+ }
+ },
+ "weights": {
+ "description": "Weight function used in prediction.",
+ "type": "string",
+ "default": "uniform",
+ "ui": {
+ "style": "radio",
+ "choices": ["Uniform", "Distance"],
+ "values": ["uniform", "distance"]
}
+ },
+ "p": {
+ "description": "Power parameter for the Minkowski metric.",
+ "type": "int",
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 2]
+ }
+ }
},
"category": "regression"
-},
-{
+ },
+ {
"name": "GradientBoostingRegressor",
"path": "sklearn.ensemble",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "An ensemble of decision trees that are iteratively trained on the dataset for the optimization of arbitrary differentiable loss functions.",
"url": "https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html",
"schema": {
- "n_estimators": {
- "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.",
- "type": "int",
- "default": 100,
- "ui": {
- "style": "radio",
- "choices": [100, 500],
- "grid_search": [100]
- }
- },
- "learning_rate": {
- "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
- "type": "float",
- "default": 0.1,
- "ui": {
- "style": "radio",
- "choices": [0.01, 0.1, 1]
- }
- },
- "max_depth": {
- "description": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.",
- "type": ["int", "none"],
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 10]
- }
- },
- "min_samples_split": {
- "description": "The minimum number of samples required to split an internal node.",
- "type": ["int", "float"],
- "default": 2,
- "ui": {
- "style": "radio",
- "choices": [2, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "min_samples_leaf": {
- "description": "The minimum number of samples required to be at a leaf node.",
- "type": ["int", "float"],
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [1, 5, 10, 20],
- "grid_search": [10, 20]
- }
- },
- "subsample": {
- "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.5, 1]
- }
- },
- "max_features": {
- "description": "The number of features to consider when looking for the best split.",
- "type": ["int", "float", "string", "none"],
- "default": "sqrt",
- "ui": {
- "style": "radio",
- "choices": ["Square root", "Log2"],
- "values": ["sqrt", "log2"]
- }
+ "n_estimators": {
+ "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.",
+ "type": "int",
+ "default": 100,
+ "ui": {
+ "style": "radio",
+ "choices": [100, 500],
+ "grid_search": [100]
}
+ },
+ "learning_rate": {
+ "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
+ "type": "float",
+ "default": 0.1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.01, 0.1, 1]
+ }
+ },
+ "max_depth": {
+ "description": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.",
+ "type": ["int", "none"],
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 10]
+ }
+ },
+ "min_samples_split": {
+ "description": "The minimum number of samples required to split an internal node.",
+ "type": ["int", "float"],
+ "default": 2,
+ "ui": {
+ "style": "radio",
+ "choices": [2, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "min_samples_leaf": {
+ "description": "The minimum number of samples required to be at a leaf node.",
+ "type": ["int", "float"],
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 5, 10, 20],
+ "grid_search": [10, 20]
+ }
+ },
+ "subsample": {
+ "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.5, 1]
+ }
+ },
+ "max_features": {
+ "description": "The number of features to consider when looking for the best split.",
+ "type": ["int", "float", "string", "none"],
+ "default": "sqrt",
+ "ui": {
+ "style": "radio",
+ "choices": ["Square root", "Log2"],
+ "values": ["sqrt", "log2"]
+ }
+ }
},
"category": "regression"
-},
-{
+ },
+ {
"name": "LassoLarsCV",
"categorical_encoding_strategy": "OneHotEncoder",
"path": "sklearn.linear_model",
"description": "Cross-validated Lasso, using the LARS algorithm.",
"url": "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsCV.html",
- "static_parameters" : {"max_iter": 10000},
+ "static_parameters": { "max_iter": 10000 },
"schema": {
- "fit_intercept": {
- "description": "Fit intercept in addition to feature coefficients.",
- "type": "bool",
- "default": "true",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
- },
- "normalize": {
- "description": "This parameter is ignored when fit_intercept is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm.",
- "type": "bool",
- "default": "true",
- "ui": {
- "style": "radio",
- "choices": ["True", "False"],
- "values": ["true", "false"]
- }
+ "fit_intercept": {
+ "description": "Fit intercept in addition to feature coefficients.",
+ "type": "bool",
+ "default": "true",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
+ }
+ },
+ "normalize": {
+ "description": "This parameter is ignored when fit_intercept is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm.",
+ "type": "bool",
+ "default": "true",
+ "ui": {
+ "style": "radio",
+ "choices": ["True", "False"],
+ "values": ["true", "false"]
}
+ }
},
"category": "regression"
-},
-{
+ },
+ {
"name": "KernelRidge",
"categorical_encoding_strategy": "OneHotEncoder",
"path": "sklearn.kernel_ridge",
"description": "Kernel ridge regression.",
"url": "https://scikit-learn.org/stable/modules/generated/sklearn.kernel_ridge.KernelRidge.html",
- "static_parameters" : {"kernel": "rbf"},
+ "static_parameters": { "kernel": "rbf" },
"schema": {
- "alpha": {
- "description": "Small positive values of alpha improve the conditioning of the problem and reduce the variance of the estimates.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.001, 0.01, 0.1, 1]
- }
- },
- "gamma": {
- "description": "Kernel coefficient for ârbfâ, âpolyâ and âsigmoidâ.",
- "type": "float",
- "default": 0.01,
- "ui": {
- "style": "radio",
- "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
- }
+ "alpha": {
+ "description": "Small positive values of alpha improve the conditioning of the problem and reduce the variance of the estimates.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.001, 0.01, 0.1, 1]
+ }
+ },
+ "gamma": {
+ "description": "Kernel coefficient for ârbfâ, âpolyâ and âsigmoidâ.",
+ "type": "float",
+ "default": 0.01,
+ "ui": {
+ "style": "radio",
+ "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]
}
+ }
},
"category": "regression"
-},
-{
+ },
+ {
"name": "XGBRegressor",
"path": "xgboost",
"categorical_encoding_strategy": "OrdinalEncoder",
"description": "eXtreme Gradient Boosting classification",
"url": "https://xgboost.readthedocs.io/en/latest/tutorials/model.html",
- "static_parameters" : {"objective": "reg:squarederror"},
+ "static_parameters": { "objective": "reg:squarederror" },
"schema": {
"n_estimators": {
- "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.",
- "type": "int",
- "default": 100,
- "ui": {
- "style": "radio",
- "choices": [100, 500],
- "grid_search": [100]
- }
+ "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.",
+ "type": "int",
+ "default": 100,
+ "ui": {
+ "style": "radio",
+ "choices": [100, 500],
+ "grid_search": [100]
+ }
},
"learning_rate": {
- "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
- "type": "float",
- "default": 0.1,
- "ui": {
- "style": "radio",
- "choices": [0.01, 0.1, 1]
- }
+ "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
+ "type": "float",
+ "default": 0.1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.01, 0.1, 1]
+ }
},
"max_depth": {
- "description": "Maximum tree depth for base learners.",
- "type": "int",
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 10]
- }
+ "description": "Maximum tree depth for base learners.",
+ "type": "int",
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 10]
+ }
},
"min_child_weight": {
- "description": "Minimum sum of instance weight(hessian) needed in a child.",
- "type": "int",
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 10, 20]
- }
+ "description": "Minimum sum of instance weight(hessian) needed in a child.",
+ "type": "int",
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 10, 20]
+ }
},
"subsample": {
- "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.5, 1]
- }
+ "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.5, 1]
+ }
}
},
"category": "regression"
-}
+ }
]
diff --git a/docker/dbmongo/files/users.json b/docker/dbmongo/files/users.json
index d7af08576..5004ebd64 100644
--- a/docker/dbmongo/files/users.json
+++ b/docker/dbmongo/files/users.json
@@ -1,56 +1,71 @@
[
-{
+ {
"username": "pennai",
"firstname": "Penn",
"lastname": "AI",
"apikey": "Oed+kIyprDrUq/3oWU5Jpyd22PqhG/CsUvI8oc9l39E=",
"roles": ["ai"],
- "algorithms": ["DecisionTreeClassifier",
- "GradientBoostingClassifier",
- "KNeighborsClassifier",
- "SVC",
- "LogisticRegression",
- "RandomForestClassifier",
- "DecisionTreeRegressor",
- "XGBRegressor",
- "SVR",
- "KNeighborsRegressor",
- "KernelRidge",
- "RandomForestRegressor"]
-},
-{
- "algorithms": ["DecisionTreeClassifier",
- "GradientBoostingClassifier",
- "KNeighborsClassifier",
- "SVC",
- "LogisticRegression",
- "RandomForestClassifier",
- "DecisionTreeRegressor",
- "XGBRegressor",
- "SVR",
- "KNeighborsRegressor",
- "KernelRidge",
- "RandomForestRegressor"],
+ "algorithms": [
+ "DecisionTreeClassifier",
+ "GradientBoostingClassifier",
+ "KNeighborsClassifier",
+ "SVC",
+ "LogisticRegression",
+ "RandomForestClassifier",
+ "GaussianNB",
+ "XGBClassifier",
+ "MLPClassifier",
+ "DecisionTreeRegressor",
+ "XGBRegressor",
+ "SVR",
+ "KNeighborsRegressor",
+ "KernelRidge",
+ "RandomForestRegressor"
+ ]
+ },
+ {
+ "algorithms": [
+ "DecisionTreeClassifier",
+ "GradientBoostingClassifier",
+ "KNeighborsClassifier",
+ "SVC",
+ "LogisticRegression",
+ "RandomForestClassifier",
+ "GaussianNB",
+ "XGBClassifier",
+ "MLPClassifier",
+ "DecisionTreeRegressor",
+ "XGBRegressor",
+ "SVR",
+ "KNeighborsRegressor",
+ "KernelRidge",
+ "RandomForestRegressor"
+ ],
"username": "pmlb",
"firstname": "Pmlb",
"lastname": "User"
-},
-{
+ },
+ {
"username": "testuser",
"firstname": "Test",
"lastname": "User",
"roles": ["admin", "beginner"],
- "algorithms": ["DecisionTreeClassifier",
- "GradientBoostingClassifier",
- "KNeighborsClassifier",
- "SVC",
- "LogisticRegression",
- "RandomForestClassifier",
- "DecisionTreeRegressor",
- "XGBRegressor",
- "SVR",
- "KNeighborsRegressor",
- "KernelRidge",
- "RandomForestRegressor"]
-}
+ "algorithms": [
+ "DecisionTreeClassifier",
+ "GradientBoostingClassifier",
+ "KNeighborsClassifier",
+ "SVC",
+ "LogisticRegression",
+ "RandomForestClassifier",
+ "GaussianNB",
+ "XGBClassifier",
+ "MLPClassifier",
+ "DecisionTreeRegressor",
+ "XGBRegressor",
+ "SVR",
+ "KNeighborsRegressor",
+ "KernelRidge",
+ "RandomForestRegressor"
+ ]
+ }
]
From 0a1a9558aeaea5cde84b37304766f10f6b86457a Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 5 Apr 2024 16:13:08 -0700
Subject: [PATCH 10/15] update
---
.gitignore | 4 +-
docker/dbmongo/files/projects.json | 118 +----------------------------
docker/dbmongo/files/users.json | 3 -
machine/learn/driver.py | 1 -
4 files changed, 6 insertions(+), 120 deletions(-)
diff --git a/.gitignore b/.gitignore
index 5e06cf6b2..bf3aea4c8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -67,4 +67,6 @@ package-lock.json
package.json
package-copy.json
machine/code_runs/
-machine/test_trained_models/
\ No newline at end of file
+machine/test_trained_models/
+projects.sample.json
+users.sample.json
diff --git a/docker/dbmongo/files/projects.json b/docker/dbmongo/files/projects.json
index 469f8b857..eb9804c5b 100644
--- a/docker/dbmongo/files/projects.json
+++ b/docker/dbmongo/files/projects.json
@@ -37,25 +37,7 @@
},
"category": "classification"
},
- {
- "name": "GaussianNB",
- "path": "sklearn.naive_bayes",
- "categorical_encoding_strategy": "OneHotEncoder",
- "description": "Gaussian Naive Bayes",
- "url": "http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html",
- "schema": {
- "var_smoothing": {
- "description": "Portion of the largest variance of all features that is added to variances for calculation stability.",
- "type": "float",
- "default": 1e-9,
- "ui": {
- "style": "radio",
- "choices": [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
- }
- }
- },
- "category": "classification"
- },
+
{
"name": "MultinomialNB",
"path": "sklearn.naive_bayes",
@@ -299,63 +281,7 @@
},
"category": "classification"
},
- {
- "name": "XGBClassifier",
- "path": "xgboost",
- "categorical_encoding_strategy": "OrdinalEncoder",
- "description": "eXtreme Gradient Boosting classifier for supervised learning tasks.",
- "url": "https://xgboost.readthedocs.io/en/latest/tutorials/model.html",
- "static_parameters": { "objective": "binary:logistic" },
- "schema": {
- "n_estimators": {
- "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting, so a large number usually results in better performance.",
- "type": "int",
- "default": 100,
- "ui": {
- "style": "radio",
- "choices": [100, 500],
- "grid_search": [100]
- }
- },
- "learning_rate": {
- "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
- "type": "float",
- "default": 0.1,
- "ui": {
- "style": "radio",
- "choices": [0.01, 0.1, 1]
- }
- },
- "max_depth": {
- "description": "Maximum tree depth for base learners.",
- "type": "int",
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 10]
- }
- },
- "min_child_weight": {
- "description": "Minimum sum of instance weight (hessian) needed in a child.",
- "type": "int",
- "default": 3,
- "ui": {
- "style": "radio",
- "choices": [1, 3, 5, 10, 20]
- }
- },
- "subsample": {
- "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0, this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample < 1.0 leads to a reduction of variance and an increase in bias.",
- "type": "float",
- "default": 1,
- "ui": {
- "style": "radio",
- "choices": [0.5, 1]
- }
- }
- },
- "category": "classification"
- },
+
{
"name": "KNeighborsClassifier",
"path": "sklearn.neighbors",
@@ -659,45 +585,7 @@
},
"category": "classification"
},
- {
- "name": "MLPClassifier",
- "path": "sklearn.neural_network",
- "categorical_encoding_strategy": "OneHotEncoder",
- "description": "Multi-layer Perceptron classifier.",
- "url": "http://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html",
- "schema": {
- "activation": {
- "description": "Activation function for the hidden layer.",
- "type": "string",
- "default": "relu",
- "ui": {
- "style": "radio",
- "choices": ["Identity", "Logistic", "Tanh", "Relu"],
- "values": ["identity", "logistic", "tanh", "relu"]
- }
- },
- "solver": {
- "description": "The solver for weight optimization.",
- "type": "string",
- "default": "adam",
- "ui": {
- "style": "radio",
- "choices": ["Adam", "Lbfgs", "Sgd"],
- "values": ["adam", "lbfgs", "sgd"]
- }
- },
- "alpha": {
- "description": "L2 penalty (regularization term) parameter.",
- "type": "float",
- "default": 0.0001,
- "ui": {
- "style": "radio",
- "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10]
- }
- }
- },
- "category": "classification"
- },
+
{
"name": "SGDClassifier",
"path": "sklearn.linear_model",
diff --git a/docker/dbmongo/files/users.json b/docker/dbmongo/files/users.json
index 5004ebd64..e95ed4e94 100644
--- a/docker/dbmongo/files/users.json
+++ b/docker/dbmongo/files/users.json
@@ -12,9 +12,6 @@
"SVC",
"LogisticRegression",
"RandomForestClassifier",
- "GaussianNB",
- "XGBClassifier",
- "MLPClassifier",
"DecisionTreeRegressor",
"XGBRegressor",
"SVR",
diff --git a/machine/learn/driver.py b/machine/learn/driver.py
index 59f8980d1..89923fa13 100644
--- a/machine/learn/driver.py
+++ b/machine/learn/driver.py
@@ -71,7 +71,6 @@ def main(args, param_grid={}):
print("param_grid_gene")
print(param_grid)
- # svd ěźë param grid ě°¨ěë?
generate_results(model=model,
From b4bf1fe10b81cc06e607084e06292da4fb2da860 Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 5 Apr 2024 16:40:56 -0700
Subject: [PATCH 11/15] update
---
docker/dbmongo/files/users.json | 6 ------
1 file changed, 6 deletions(-)
diff --git a/docker/dbmongo/files/users.json b/docker/dbmongo/files/users.json
index e95ed4e94..67c2b25f7 100644
--- a/docker/dbmongo/files/users.json
+++ b/docker/dbmongo/files/users.json
@@ -28,9 +28,6 @@
"SVC",
"LogisticRegression",
"RandomForestClassifier",
- "GaussianNB",
- "XGBClassifier",
- "MLPClassifier",
"DecisionTreeRegressor",
"XGBRegressor",
"SVR",
@@ -54,9 +51,6 @@
"SVC",
"LogisticRegression",
"RandomForestClassifier",
- "GaussianNB",
- "XGBClassifier",
- "MLPClassifier",
"DecisionTreeRegressor",
"XGBRegressor",
"SVR",
From 24a3eba2ca204ecf95b994c36dc60a93ce65941c Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 5 Apr 2024 16:43:36 -0700
Subject: [PATCH 12/15] update
---
.gitignore | 1 -
1 file changed, 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index bf3aea4c8..411dc4560 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,7 +62,6 @@ MANIFEST
.xz
*.exe
-# *.mp4
package-lock.json
package.json
package-copy.json
From 9930f90dec7c49cad251b198be3e6ed35b567c82 Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 5 Apr 2024 17:09:13 -0700
Subject: [PATCH 13/15] remove sgdclassifier
---
docker/dbmongo/files/projects.json | 49 ------------------------------
1 file changed, 49 deletions(-)
diff --git a/docker/dbmongo/files/projects.json b/docker/dbmongo/files/projects.json
index eb9804c5b..17b0424d9 100644
--- a/docker/dbmongo/files/projects.json
+++ b/docker/dbmongo/files/projects.json
@@ -586,55 +586,6 @@
"category": "classification"
},
- {
- "name": "SGDClassifier",
- "path": "sklearn.linear_model",
- "categorical_encoding_strategy": "OneHotEncoder",
- "description": "Linear classifiers (SVM, logistic regression, a.o.) with SGD training.",
- "url": "http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html",
- "schema": {
- "loss": {
- "description": "The loss function to be used. The possible values are âhingeâ, âlogâ, âmodified_huberâ, âsquared_hingeâ, âperceptronâ, or a regression loss: âsquared_lossâ, âhuberâ, âepsilon_insensitiveâ, or âsquared_epsilon_insensitiveâ.",
- "type": "string",
- "default": "hinge",
- "ui": {
- "style": "radio",
- "choices": [
- "Hinge",
- "Log",
- "Modified Huber",
- "Squared Hinge",
- "Perceptron",
- "Squared Loss",
- "Huber",
- "Epsilon Insensitive",
- "Squared Epsilon Insensitive"
- ],
- "values": [
- "hinge",
- "log",
- "modified_huber",
- "squared_hinge",
- "perceptron",
- "squared_loss",
- "huber",
- "epsilon_insensitive",
- "squared_epsilon_insensitive"
- ]
- }
- },
- "learning_rate": {
- "description": "The learning rate schedule. The possible values are âconstantâ, âoptimalâ, âinvscalingâ, âadaptiveâ.",
- "type": "string",
- "default": "optimal",
- "ui": {
- "style": "radio",
- "choices": ["Constant", "Optimal", "Invscaling", "Adaptive"],
- "values": ["constant", "optimal", "invscaling", "adaptive"]
- }
- }
- }
- },
{
"name": "DecisionTreeRegressor",
"path": "sklearn.tree",
From 55912f7d1aa54fa672aee18b5b15a51ace5a6e10 Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 5 Apr 2024 17:35:28 -0700
Subject: [PATCH 14/15] update algorithm in projects.json
---
docker/dbmongo/files/projects.json | 68 ++++++++++++++++++++++++++++--
1 file changed, 65 insertions(+), 3 deletions(-)
diff --git a/docker/dbmongo/files/projects.json b/docker/dbmongo/files/projects.json
index 17b0424d9..89e436aa6 100644
--- a/docker/dbmongo/files/projects.json
+++ b/docker/dbmongo/files/projects.json
@@ -37,7 +37,15 @@
},
"category": "classification"
},
-
+ {
+ "name": "GaussianNB",
+ "path": "sklearn.naive_bayes",
+ "categorical_encoding_strategy": "OneHotEncoder",
+ "description": "Gaussian Naive Bayes",
+ "url": "http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html",
+ "schema": {},
+ "category": "classification"
+ },
{
"name": "MultinomialNB",
"path": "sklearn.naive_bayes",
@@ -281,7 +289,62 @@
},
"category": "classification"
},
-
+ {
+ "name": "XGBClassifier",
+ "path": "xgboost",
+ "categorical_encoding_strategy": "OrdinalEncoder",
+ "description": "eXtreme Gradient Boosting classification",
+ "url": "https://xgboost.readthedocs.io/en/latest/tutorials/model.html",
+ "schema": {
+ "n_estimators": {
+ "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.",
+ "type": "int",
+ "default": 100,
+ "ui": {
+ "style": "radio",
+ "choices": [100, 500],
+ "grid_search": [100]
+ }
+ },
+ "learning_rate": {
+ "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.",
+ "type": "float",
+ "default": 0.1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.01, 0.1, 1]
+ }
+ },
+ "max_depth": {
+ "description": "Maximum tree depth for base learners.",
+ "type": "int",
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 10]
+ }
+ },
+ "min_child_weight": {
+ "description": "Minimum sum of instance weight(hessian) needed in a child.",
+ "type": "int",
+ "default": 3,
+ "ui": {
+ "style": "radio",
+ "choices": [1, 3, 5, 10, 20]
+ }
+ },
+ "subsample": {
+ "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.",
+ "type": "float",
+ "default": 1,
+ "ui": {
+ "style": "radio",
+ "choices": [0.5, 1]
+ }
+ }
+ },
+ "category": "classification"
+ },
{
"name": "KNeighborsClassifier",
"path": "sklearn.neighbors",
@@ -585,7 +648,6 @@
},
"category": "classification"
},
-
{
"name": "DecisionTreeRegressor",
"path": "sklearn.tree",
From fbf6aac4d695ebfe3da9490e9b61cb3b1728bae6 Mon Sep 17 00:00:00 2001
From: choi
Date: Fri, 5 Apr 2024 18:30:04 -0700
Subject: [PATCH 15/15] update gitignore
---
.gitignore | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
index a6a17b7bd..142adb3ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,8 +62,10 @@ MANIFEST
.xz
*.exe
-# *.mp4
package-lock.json
package.json
package-copy.json
-machine/code_runs/
\ No newline at end of file
+machine/code_runs/
+machine/test_trained_models/
+projects.sample.json
+users.sample.json
\ No newline at end of file