diff --git a/.gitignore b/.gitignore index a6a17b7bd..411dc4560 100644 --- a/.gitignore +++ b/.gitignore @@ -62,8 +62,10 @@ MANIFEST .xz *.exe -# *.mp4 package-lock.json package.json package-copy.json -machine/code_runs/ \ No newline at end of file +machine/code_runs/ +machine/test_trained_models/ +projects.sample.json +users.sample.json diff --git a/docker/dbmongo/files/projects.json b/docker/dbmongo/files/projects.json index fbf610bb7..89e436aa6 100644 --- a/docker/dbmongo/files/projects.json +++ b/docker/dbmongo/files/projects.json @@ -1,42 +1,43 @@ -[{ +[ + { "name": "BernoulliNB", "path": "sklearn.naive_bayes", "categorical_encoding_strategy": "OneHotEncoder", "description": "Naive Bayes classifier for multivariate Bernoulli models.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.BernoulliNB.html", "schema": { - "alpha": { - "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.001, 0.01, 0.1, 1, 10, 100] - } - }, - "binarize": { - "description": "Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.", - "type": "float", - "default": 0, - "ui": { - "style": "radio", - "choices": [0, 0.25, 0.5, 0.75, 1] - } - }, - "fit_prior": { - "description": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used.", - "type": "bool", - "default": "true", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } + "alpha": { + "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.001, 0.01, 0.1, 1, 10, 100] } + }, + "binarize": { + "description": "Threshold for binarizing (mapping to booleans) of sample features. If None, input is presumed to already consist of binary vectors.", + "type": "float", + "default": 0, + "ui": { + "style": "radio", + "choices": [0, 0.25, 0.5, 0.75, 1] + } + }, + "fit_prior": { + "description": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used.", + "type": "bool", + "default": "true", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] + } + } }, "category": "classification" -}, -{ + }, + { "name": "GaussianNB", "path": "sklearn.naive_bayes", "categorical_encoding_strategy": "OneHotEncoder", @@ -44,1049 +45,1051 @@ "url": "http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html", "schema": {}, "category": "classification" -}, -{ + }, + { "name": "MultinomialNB", "path": "sklearn.naive_bayes", "categorical_encoding_strategy": "OneHotEncoder", "description": "Naive Bayes classifier for multinomial models.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.MultinomialNB.html", "schema": { - "alpha": { - "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).", - "type": "float", - "default": 1.0, - "ui": { - "style": "radio", - "choices": [0.001, 0.01, 0.1, 1.0, 10, 100] - } - }, - "fit_prior": { - "description": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used.", - "type": "bool", - "default": "true", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } + "alpha": { + "description": "Additive (Laplace/Lidstone) smoothing parameter (0 for no smoothing).", + "type": "float", + "default": 1.0, + "ui": { + "style": "radio", + "choices": [0.001, 0.01, 0.1, 1.0, 10, 100] + } + }, + "fit_prior": { + "description": "Whether to learn class prior probabilities or not. If false, a uniform prior will be used.", + "type": "bool", + "default": "true", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] } + } }, "category": "classification" -}, -{ + }, + { "name": "DecisionTreeClassifier", "path": "sklearn.tree", "categorical_encoding_strategy": "OrdinalEncoder", "description": "Classifier that assigns a class to a sample based on a chained series of yes/no queries about the sample's features.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html", "schema": { - "criterion": { - "description": "The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain.", - "type": "string", - "default": "gini", - "ui": { - "style": "radio", - "choices": ["Gini impurity", "Information gain"], - "values": ["gini", "entropy"] - } - }, - "max_depth": { - "description": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.", - "type": ["int", "none"], - "default": 3, - "ui": { - "style": "radio", - "choices": [3, 5, 10] - } - }, - "min_samples_split": { - "description": "The minimum number of samples required to split an internal node.", - "type": ["int", "float"], - "default": 2, - "ui": { - "style": "radio", - "choices": [2, 5, 10, 20] - } - }, - "min_samples_leaf": { - "description": "The minimum number of samples required to be at a leaf node.", - "type": ["int", "float"], - "default": 1, - "ui": { - "style": "radio", - "choices": [1, 5, 10, 20] - } - }, - "min_weight_fraction_leaf": { - "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.", - "type": "float", - "default": 0.0, - "ui": { - "style": "radio", - "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45] - } - }, - "max_features": { - "description": "The number of features to consider when looking for the best split.", - "type": ["int", "float", "string", "none"], - "default": "sqrt", - "ui": { - "style": "radio", - "choices": ["Square root", "Log2", "None"], - "values": ["sqrt", "log2", "None"] - } + "criterion": { + "description": "The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain.", + "type": "string", + "default": "gini", + "ui": { + "style": "radio", + "choices": ["Gini impurity", "Information gain"], + "values": ["gini", "entropy"] + } + }, + "max_depth": { + "description": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.", + "type": ["int", "none"], + "default": 3, + "ui": { + "style": "radio", + "choices": [3, 5, 10] } + }, + "min_samples_split": { + "description": "The minimum number of samples required to split an internal node.", + "type": ["int", "float"], + "default": 2, + "ui": { + "style": "radio", + "choices": [2, 5, 10, 20] + } + }, + "min_samples_leaf": { + "description": "The minimum number of samples required to be at a leaf node.", + "type": ["int", "float"], + "default": 1, + "ui": { + "style": "radio", + "choices": [1, 5, 10, 20] + } + }, + "min_weight_fraction_leaf": { + "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.", + "type": "float", + "default": 0.0, + "ui": { + "style": "radio", + "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45] + } + }, + "max_features": { + "description": "The number of features to consider when looking for the best split.", + "type": ["int", "float", "string", "none"], + "default": "sqrt", + "ui": { + "style": "radio", + "choices": ["Square root", "Log2", "None"], + "values": ["sqrt", "log2", "None"] + } + } }, "category": "classification" -}, -{ + }, + { "name": "ExtraTreesClassifier", "path": "sklearn.ensemble", "categorical_encoding_strategy": "OrdinalEncoder", "description": "Extremely Randomized Trees", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html", "schema": { - "n_estimators": { - "description": "The number of trees in the forest.", - "type": "int", - "default": 100, - "ui": { - "style": "radio", - "choices": [100, 500], - "grid_search": [100] - } - }, - "criterion": { - "description": "The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain.", - "type": "string", - "values": ["gini", "entropy"], - "default": "gini", - "ui": { - "style": "radio", - "choices": ["Gini impurity", "Information gain"], - "values": ["gini", "entropy"] - } - }, - "max_features": { - "description": "The number of features to consider when looking for the best split.", - "type": ["int", "float", "string", "none"], - "default": "sqrt", - "ui": { - "style": "radio", - "choices": ["Square root", "Log2", "None"], - "values": ["sqrt", "log2", "None"] - } - }, - "min_samples_split": { - "description": "The minimum number of samples required to split an internal node.", - "type": ["int", "float"], - "default": 2, - "ui": { - "style": "radio", - "choices": [2, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "min_samples_leaf": { - "description": "The minimum number of samples required to be at a leaf node.", - "type": ["int", "float"], - "default": 1, - "ui": { - "style": "radio", - "choices": [1, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "bootstrap": { - "description": "Whether bootstrap samples are used when building trees.", - "type": "bool", - "default": "false", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } + "n_estimators": { + "description": "The number of trees in the forest.", + "type": "int", + "default": 100, + "ui": { + "style": "radio", + "choices": [100, 500], + "grid_search": [100] + } + }, + "criterion": { + "description": "The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain.", + "type": "string", + "values": ["gini", "entropy"], + "default": "gini", + "ui": { + "style": "radio", + "choices": ["Gini impurity", "Information gain"], + "values": ["gini", "entropy"] } + }, + "max_features": { + "description": "The number of features to consider when looking for the best split.", + "type": ["int", "float", "string", "none"], + "default": "sqrt", + "ui": { + "style": "radio", + "choices": ["Square root", "Log2", "None"], + "values": ["sqrt", "log2", "None"] + } + }, + "min_samples_split": { + "description": "The minimum number of samples required to split an internal node.", + "type": ["int", "float"], + "default": 2, + "ui": { + "style": "radio", + "choices": [2, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "min_samples_leaf": { + "description": "The minimum number of samples required to be at a leaf node.", + "type": ["int", "float"], + "default": 1, + "ui": { + "style": "radio", + "choices": [1, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "bootstrap": { + "description": "Whether bootstrap samples are used when building trees.", + "type": "bool", + "default": "false", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] + } + } }, "category": "classification" -}, -{ + }, + { "name": "GradientBoostingClassifier", "path": "sklearn.ensemble", "categorical_encoding_strategy": "OrdinalEncoder", "description": "An ensemble of decision trees that are iteratively trained on the dataset for the optimization of arbitrary differentiable loss functions.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html", "schema": { - "n_estimators": { - "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.", - "type": "int", - "default": 100, - "ui": { - "style": "radio", - "choices": [100, 500], - "grid_search": [100] - } - }, - "learning_rate": { - "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.", - "type": "float", - "default": 0.1, - "ui": { - "style": "radio", - "choices": [0.01, 0.1, 1] - } - }, - "max_depth": { - "description": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.", - "type": ["int", "none"], - "default": 3, - "ui": { - "style": "radio", - "choices": [1, 3, 5, 10] - } - }, - "min_samples_split": { - "description": "The minimum number of samples required to split an internal node.", - "type": ["int", "float"], - "default": 2, - "ui": { - "style": "radio", - "choices": [2, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "min_samples_leaf": { - "description": "The minimum number of samples required to be at a leaf node.", - "type": ["int", "float"], - "default": 1, - "ui": { - "style": "radio", - "choices": [1, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "subsample": { - "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.5, 1] - } - }, - "max_features": { - "description": "The number of features to consider when looking for the best split.", - "type": ["int", "float", "string", "none"], - "default": "sqrt", - "ui": { - "style": "radio", - "choices": ["Square root", "Log2"], - "values": ["sqrt", "log2"] - } + "n_estimators": { + "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.", + "type": "int", + "default": 100, + "ui": { + "style": "radio", + "choices": [100, 500], + "grid_search": [100] + } + }, + "learning_rate": { + "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.", + "type": "float", + "default": 0.1, + "ui": { + "style": "radio", + "choices": [0.01, 0.1, 1] + } + }, + "max_depth": { + "description": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.", + "type": ["int", "none"], + "default": 3, + "ui": { + "style": "radio", + "choices": [1, 3, 5, 10] + } + }, + "min_samples_split": { + "description": "The minimum number of samples required to split an internal node.", + "type": ["int", "float"], + "default": 2, + "ui": { + "style": "radio", + "choices": [2, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "min_samples_leaf": { + "description": "The minimum number of samples required to be at a leaf node.", + "type": ["int", "float"], + "default": 1, + "ui": { + "style": "radio", + "choices": [1, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "subsample": { + "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.5, 1] } + }, + "max_features": { + "description": "The number of features to consider when looking for the best split.", + "type": ["int", "float", "string", "none"], + "default": "sqrt", + "ui": { + "style": "radio", + "choices": ["Square root", "Log2"], + "values": ["sqrt", "log2"] + } + } }, "category": "classification" -}, -{ + }, + { "name": "XGBClassifier", "path": "xgboost", "categorical_encoding_strategy": "OrdinalEncoder", "description": "eXtreme Gradient Boosting classification", "url": "https://xgboost.readthedocs.io/en/latest/tutorials/model.html", "schema": { - "n_estimators": { - "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.", - "type": "int", - "default": 100, - "ui": { - "style": "radio", - "choices": [100, 500], - "grid_search": [100] - } - }, - "learning_rate": { - "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.", - "type": "float", - "default": 0.1, - "ui": { - "style": "radio", - "choices": [0.01, 0.1, 1] - } - }, - "max_depth": { - "description": "Maximum tree depth for base learners.", - "type": "int", - "default": 3, - "ui": { - "style": "radio", - "choices": [1, 3, 5, 10] - } - }, - "min_child_weight": { - "description": "Minimum sum of instance weight(hessian) needed in a child.", - "type": "int", - "default": 3, - "ui": { - "style": "radio", - "choices": [1, 3, 5, 10, 20] - } - }, - "subsample": { - "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.5, 1] - } + "n_estimators": { + "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.", + "type": "int", + "default": 100, + "ui": { + "style": "radio", + "choices": [100, 500], + "grid_search": [100] + } + }, + "learning_rate": { + "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.", + "type": "float", + "default": 0.1, + "ui": { + "style": "radio", + "choices": [0.01, 0.1, 1] + } + }, + "max_depth": { + "description": "Maximum tree depth for base learners.", + "type": "int", + "default": 3, + "ui": { + "style": "radio", + "choices": [1, 3, 5, 10] } + }, + "min_child_weight": { + "description": "Minimum sum of instance weight(hessian) needed in a child.", + "type": "int", + "default": 3, + "ui": { + "style": "radio", + "choices": [1, 3, 5, 10, 20] + } + }, + "subsample": { + "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.5, 1] + } + } }, "category": "classification" -}, -{ + }, + { "name": "KNeighborsClassifier", "path": "sklearn.neighbors", "categorical_encoding_strategy": "OrdinalEncoder", "description": "Nearest-neighbor classifier that classifies new data points based on the most common class among the k nearest data points.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html", "schema": { - "n_neighbors": { - "description": "Number of neighbors to use by default for k_neighbors queries.", - "type": "int", - "default": 5, - "ui": { - "style": "radio", - "choices": [1, 3, 5, 7, 9, 11] - } - }, - "weights": { - "description": "Weight function used in prediction.", - "type": "string", - "default": "uniform", - "ui": { - "style": "radio", - "choices": ["Uniform", "Distance"], - "values": ["uniform", "distance"] - } - }, - "p": { - "description": "Power parameter for the Minkowski metric.", - "type": "int", - "default": 2, - "ui": { - "style": "radio", - "choices": [1, 2] - } + "n_neighbors": { + "description": "Number of neighbors to use by default for k_neighbors queries.", + "type": "int", + "default": 5, + "ui": { + "style": "radio", + "choices": [1, 3, 5, 7, 9, 11] } + }, + "weights": { + "description": "Weight function used in prediction.", + "type": "string", + "default": "uniform", + "ui": { + "style": "radio", + "choices": ["Uniform", "Distance"], + "values": ["uniform", "distance"] + } + }, + "p": { + "description": "Power parameter for the Minkowski metric.", + "type": "int", + "default": 2, + "ui": { + "style": "radio", + "choices": [1, 2] + } + } }, "category": "classification" -}, -{ + }, + { "name": "LinearSVC", "path": "sklearn.svm", "categorical_encoding_strategy": "OneHotEncoder", "description": "Linear Support Vector Classification.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html", - "invalidParameterCombinations" : [ - [{"penalty":"l2"}, {"loss":"hinge"}, {"dual":"false"}], - [{"penalty":"l1"}, {"loss":"square_hinge"}, {"dual":"true"}], - [{"penalty":"l1"}, {"loss":"hinge"}] + "invalidParameterCombinations": [ + [{ "penalty": "l2" }, { "loss": "hinge" }, { "dual": "false" }], + [{ "penalty": "l1" }, { "loss": "square_hinge" }, { "dual": "true" }], + [{ "penalty": "l1" }, { "loss": "hinge" }] ], "schema": { - "penalty": { - "description": "Specifies the norm used in the penalization. The ‘l2’ penalty is the standard used in SVC. The ‘l1’ leads to coef_ vectors that are sparse.", - "type": "string", - "default": "l2", - "ui": { - "style": "radio", - "choices": ["L1", "L2"], - "values": ["l1", "l2"] - } - }, - "loss": { - "description": "Specifies the loss function. ‘hinge’ is the standard SVM loss (used e.g. by the SVC class) while ‘squared_hinge’ is the square of the hinge loss.", - "type": "string", - "default": "squared_hinge", - "ui": { - "style": "radio", - "choices": ["Hinge", "Squared hinge"], - "values": ["hinge", "squared_hinge"] - } - }, - "dual": { - "description": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples \u003e n_features.", - "type": "bool", - "default": "true", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } - }, - "tol": { - "description": "Tolerance for stopping criteria.", - "type": "float", - "default": 0.0001, - "ui": { - "style": "radio", - "choices": [1e-05, 0.0001, 0.001, 0.01, 0.1] - } - }, - "C": { - "description": "Penalty parameter C of the error term.", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] - } + "penalty": { + "description": "Specifies the norm used in the penalization. The ‘l2’ penalty is the standard used in SVC. The ‘l1’ leads to coef_ vectors that are sparse.", + "type": "string", + "default": "l2", + "ui": { + "style": "radio", + "choices": ["L1", "L2"], + "values": ["l1", "l2"] } + }, + "loss": { + "description": "Specifies the loss function. ‘hinge’ is the standard SVM loss (used e.g. by the SVC class) while ‘squared_hinge’ is the square of the hinge loss.", + "type": "string", + "default": "squared_hinge", + "ui": { + "style": "radio", + "choices": ["Hinge", "Squared hinge"], + "values": ["hinge", "squared_hinge"] + } + }, + "dual": { + "description": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples \u003e n_features.", + "type": "bool", + "default": "true", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] + } + }, + "tol": { + "description": "Tolerance for stopping criteria.", + "type": "float", + "default": 0.0001, + "ui": { + "style": "radio", + "choices": [1e-5, 0.0001, 0.001, 0.01, 0.1] + } + }, + "C": { + "description": "Penalty parameter C of the error term.", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] + } + } }, "category": "classification" -}, -{ + }, + { "name": "LogisticRegression", "categorical_encoding_strategy": "OneHotEncoder", "path": "sklearn.linear_model", "description": "Basic logistic regression that makes predictions about the outcome based on a linear combination of the features.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html", - "invalidParameterCombinations" : [ - [{"penalty":"l1"}, {"dual":"true"}] - ], - "static_parameters" : {"solver": "liblinear", "multi_class": "auto"}, + "invalidParameterCombinations": [[{ "penalty": "l1" }, { "dual": "true" }]], + "static_parameters": { "solver": "liblinear", "multi_class": "auto" }, "schema": { - "penalty": { - "description": "Used to specify the norm used in the penalization. The ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers support only l2 penalties.", - "type": "string", - "default": "l2", - "ui": { - "style": "radio", - "choices": ["L1", "L2"], - "values": ["l1", "l2"] - } - }, - "C": { - "description": "Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.", - "type": "float", - "default": 1.0, - "ui": { - "style": "radio", - "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100], - "grid_search": [0.0001, 0.01, 0.1, 0.5, 1, 10] - } - }, - "dual": { - "description": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples \u003e n_features.", - "type": "bool", - "default": "false", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } - }, - "fit_intercept": { - "description": "Fit intercept in addition to feature coefficients.", - "type": "bool", - "default": "true", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } + "penalty": { + "description": "Used to specify the norm used in the penalization. The ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers support only l2 penalties.", + "type": "string", + "default": "l2", + "ui": { + "style": "radio", + "choices": ["L1", "L2"], + "values": ["l1", "l2"] + } + }, + "C": { + "description": "Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.", + "type": "float", + "default": 1.0, + "ui": { + "style": "radio", + "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100], + "grid_search": [0.0001, 0.01, 0.1, 0.5, 1, 10] } + }, + "dual": { + "description": "Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples \u003e n_features.", + "type": "bool", + "default": "false", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] + } + }, + "fit_intercept": { + "description": "Fit intercept in addition to feature coefficients.", + "type": "bool", + "default": "true", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] + } + } }, "category": "classification" -}, -{ + }, + { "name": "RandomForestClassifier", "path": "sklearn.ensemble", "categorical_encoding_strategy": "OrdinalEncoder", "description": "An ensemble of decision trees that are trained on random sub-samples of the dataset.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html", "schema": { - "n_estimators": { - "description": "The number of trees in the forest.", - "type": "int", - "default": 100, - "ui": { - "style": "radio", - "choices": [100, 500], - "grid_search": [100] - } - }, - "criterion": { - "description": "The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. Note: this parameter is tree-specific.", - "type": "string", - "default": "gini", - "ui": { - "style": "radio", - "choices": ["Gini impurity", "Information gain"], - "values": ["gini", "entropy"] - } - }, - "max_features": { - "description": "The number of features to consider when looking for the best split.", - "type": ["int", "float", "string", "none"], - "default": "sqrt", - "ui": { - "style": "radio", - "choices": ["Square root", "Log2"], - "values": ["sqrt", "log2"] - } - }, - "min_samples_split": { - "description": "The minimum number of samples required to split an internal node.", - "type": ["int", "float"], - "default": 2, - "ui": { - "style": "radio", - "choices": [2, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "min_samples_leaf": { - "description": "The minimum number of samples required to be at a leaf node.", - "type": ["int", "float"], - "default": 1, - "ui": { - "style": "radio", - "choices": [1, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "bootstrap": { - "description": "Whether bootstrap samples are used when building trees.", - "type": "bool", - "default": "true", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } - }, - "min_weight_fraction_leaf": { - "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.", - "type": "float", - "default": 0.0, - "ui": { - "style": "radio", - "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45], - "grid_search": [0.0, 0.1, 0.2, 0.3, 0.4] - } + "n_estimators": { + "description": "The number of trees in the forest.", + "type": "int", + "default": 100, + "ui": { + "style": "radio", + "choices": [100, 500], + "grid_search": [100] } + }, + "criterion": { + "description": "The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. Note: this parameter is tree-specific.", + "type": "string", + "default": "gini", + "ui": { + "style": "radio", + "choices": ["Gini impurity", "Information gain"], + "values": ["gini", "entropy"] + } + }, + "max_features": { + "description": "The number of features to consider when looking for the best split.", + "type": ["int", "float", "string", "none"], + "default": "sqrt", + "ui": { + "style": "radio", + "choices": ["Square root", "Log2"], + "values": ["sqrt", "log2"] + } + }, + "min_samples_split": { + "description": "The minimum number of samples required to split an internal node.", + "type": ["int", "float"], + "default": 2, + "ui": { + "style": "radio", + "choices": [2, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "min_samples_leaf": { + "description": "The minimum number of samples required to be at a leaf node.", + "type": ["int", "float"], + "default": 1, + "ui": { + "style": "radio", + "choices": [1, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "bootstrap": { + "description": "Whether bootstrap samples are used when building trees.", + "type": "bool", + "default": "true", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] + } + }, + "min_weight_fraction_leaf": { + "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.", + "type": "float", + "default": 0.0, + "ui": { + "style": "radio", + "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45], + "grid_search": [0.0, 0.1, 0.2, 0.3, 0.4] + } + } }, "category": "classification" -}, -{ + }, + { "name": "SVC", "path": "sklearn.svm", "categorical_encoding_strategy": "OrdinalEncoder", "description": "Kernel-based classifier that maps the data into a high-dimesional space then constructs a hyperplane that maximally separates the classes in that high-dimesional space.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html", - "static_parameters" : {"cache_size": 700, "max_iter": 10000, "probability": true}, + "static_parameters": { + "cache_size": 700, + "max_iter": 10000, + "probability": true + }, "schema": { - "kernel": { - "description": "Specifies the kernel type to be used in the algorithm", - "type": "string", - "default": "rbf", - "ui": { - "style": "radio", - "choices": ["Polynomial", "Radial basis function"], - "values": ["poly", "rbf"] - } - }, - "tol": { - "description": "Tolerance for stopping criteria.", - "type": "float", - "default": 0.0001, - "ui": { - "style": "radio", - "choices": [1e-05, 0.0001, 0.001, 0.01, 0.1] - } - }, - "C": { - "description": "Penalty parameter C of the error term.", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] - } - }, - "gamma": { - "description": "Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.", - "type": "float", - "default": 0.01, - "ui": { - "style": "radio", - "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] - } - }, - "degree": { - "description": "Degree of the 'poly' kernel.", - "type": "int", - "default": 3, - "ui": { - "style": "radio", - "choices": [2, 3 ] - } - }, - "coef0": { - "description": "Independent term in kernel function.", - "type": "float", - "default": 0.0 , - "ui": { - "style": "radio", - "choices": [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10] - } + "kernel": { + "description": "Specifies the kernel type to be used in the algorithm", + "type": "string", + "default": "rbf", + "ui": { + "style": "radio", + "choices": ["Polynomial", "Radial basis function"], + "values": ["poly", "rbf"] } + }, + "tol": { + "description": "Tolerance for stopping criteria.", + "type": "float", + "default": 0.0001, + "ui": { + "style": "radio", + "choices": [1e-5, 0.0001, 0.001, 0.01, 0.1] + } + }, + "C": { + "description": "Penalty parameter C of the error term.", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] + } + }, + "gamma": { + "description": "Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.", + "type": "float", + "default": 0.01, + "ui": { + "style": "radio", + "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] + } + }, + "degree": { + "description": "Degree of the 'poly' kernel.", + "type": "int", + "default": 3, + "ui": { + "style": "radio", + "choices": [2, 3] + } + }, + "coef0": { + "description": "Independent term in kernel function.", + "type": "float", + "default": 0.0, + "ui": { + "style": "radio", + "choices": [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10] + } + } }, "category": "classification" -}, -{ + }, + { "name": "DecisionTreeRegressor", "path": "sklearn.tree", "categorical_encoding_strategy": "OrdinalEncoder", "description": "A Decision Tree Regressor", "url": "https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html", "schema": { - "criterion": { - "description": "The function to measure the quality of a split. ", - "type": "string", - "default": "mse", - "ui": { - "style": "radio", - "choices": ["Mean Squared Error", "Mean Absolute Error"], - "values": ["mse", "mae"] - } - }, - "max_depth": { - "description": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.", - "type": ["int", "none"], - "default": 3, - "ui": { - "style": "radio", - "choices": [3, 5, 10] - } - }, - "min_samples_split": { - "description": "The minimum number of samples required to split an internal node.", - "type": ["int", "float"], - "default": 2, - "ui": { - "style": "radio", - "choices": [2, 5, 10, 20] - } - }, - "min_samples_leaf": { - "description": "The minimum number of samples required to be at a leaf node.", - "type": ["int", "float"], - "default": 1, - "ui": { - "style": "radio", - "choices": [1, 5, 10, 20] - } - }, - "min_weight_fraction_leaf": { - "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.", - "type": "float", - "default": 0.0, - "ui": { - "style": "radio", - "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45] - } - }, - "max_features": { - "description": "The number of features to consider when looking for the best split.", - "type": ["int", "float", "string", "none"], - "default": "sqrt", - "ui": { - "style": "radio", - "choices": ["Square root", "Log2", "None"], - "values": ["sqrt", "log2", "None"] - } + "criterion": { + "description": "The function to measure the quality of a split. ", + "type": "string", + "default": "mse", + "ui": { + "style": "radio", + "choices": ["Mean Squared Error", "Mean Absolute Error"], + "values": ["mse", "mae"] + } + }, + "max_depth": { + "description": "The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.", + "type": ["int", "none"], + "default": 3, + "ui": { + "style": "radio", + "choices": [3, 5, 10] } + }, + "min_samples_split": { + "description": "The minimum number of samples required to split an internal node.", + "type": ["int", "float"], + "default": 2, + "ui": { + "style": "radio", + "choices": [2, 5, 10, 20] + } + }, + "min_samples_leaf": { + "description": "The minimum number of samples required to be at a leaf node.", + "type": ["int", "float"], + "default": 1, + "ui": { + "style": "radio", + "choices": [1, 5, 10, 20] + } + }, + "min_weight_fraction_leaf": { + "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.", + "type": "float", + "default": 0.0, + "ui": { + "style": "radio", + "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45] + } + }, + "max_features": { + "description": "The number of features to consider when looking for the best split.", + "type": ["int", "float", "string", "none"], + "default": "sqrt", + "ui": { + "style": "radio", + "choices": ["Square root", "Log2", "None"], + "values": ["sqrt", "log2", "None"] + } + } }, "category": "regression" -}, -{ + }, + { "name": "RandomForestRegressor", "path": "sklearn.ensemble", "categorical_encoding_strategy": "OrdinalEncoder", "description": "An ensemble of decision trees that are trained on random sub-samples of the dataset.", "url": "https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html", "schema": { - "n_estimators": { - "description": "The number of trees in the forest.", - "type": "int", - "default": 100, - "ui": { - "style": "radio", - "choices": [100, 500], - "grid_search": [100] - } - }, - "criterion": { - "description": "The function to measure the quality of a split. ", - "type": "string", - "default": "mse", - "ui": { - "style": "radio", - "choices": ["Mean Squared Error", "Mean Absolute Error"], - "values": ["mse", "mae"] - } - }, - "max_features": { - "description": "The number of features to consider when looking for the best split.", - "type": ["int", "float", "string", "none"], - "default": "sqrt", - "ui": { - "style": "radio", - "choices": ["Square root", "Log2"], - "values": ["sqrt", "log2"] - } - }, - "min_samples_split": { - "description": "The minimum number of samples required to split an internal node.", - "type": ["int", "float"], - "default": 2, - "ui": { - "style": "radio", - "choices": [2, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "min_samples_leaf": { - "description": "The minimum number of samples required to be at a leaf node.", - "type": ["int", "float"], - "default": 1, - "ui": { - "style": "radio", - "choices": [1, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "bootstrap": { - "description": "Whether bootstrap samples are used when building trees.", - "type": "bool", - "default": "true", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } - }, - "min_weight_fraction_leaf": { - "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.", - "type": "float", - "default": 0.0, - "ui": { - "style": "radio", - "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45], - "grid_search": [0.0, 0.1, 0.2, 0.3, 0.4] - } + "n_estimators": { + "description": "The number of trees in the forest.", + "type": "int", + "default": 100, + "ui": { + "style": "radio", + "choices": [100, 500], + "grid_search": [100] + } + }, + "criterion": { + "description": "The function to measure the quality of a split. ", + "type": "string", + "default": "mse", + "ui": { + "style": "radio", + "choices": ["Mean Squared Error", "Mean Absolute Error"], + "values": ["mse", "mae"] + } + }, + "max_features": { + "description": "The number of features to consider when looking for the best split.", + "type": ["int", "float", "string", "none"], + "default": "sqrt", + "ui": { + "style": "radio", + "choices": ["Square root", "Log2"], + "values": ["sqrt", "log2"] + } + }, + "min_samples_split": { + "description": "The minimum number of samples required to split an internal node.", + "type": ["int", "float"], + "default": 2, + "ui": { + "style": "radio", + "choices": [2, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "min_samples_leaf": { + "description": "The minimum number of samples required to be at a leaf node.", + "type": ["int", "float"], + "default": 1, + "ui": { + "style": "radio", + "choices": [1, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "bootstrap": { + "description": "Whether bootstrap samples are used when building trees.", + "type": "bool", + "default": "true", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] + } + }, + "min_weight_fraction_leaf": { + "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node.", + "type": "float", + "default": 0.0, + "ui": { + "style": "radio", + "choices": [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45], + "grid_search": [0.0, 0.1, 0.2, 0.3, 0.4] } + } }, "category": "regression" -}, -{ + }, + { "name": "SVR", "path": "sklearn.svm", "categorical_encoding_strategy": "OrdinalEncoder", "description": "Kernel-based regressor that maps the data into a high-dimesional space then constructs a hyperplane that maximally separates the classes in that high-dimesional space.", "url": "http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html", - "static_parameters" : {"cache_size": 700, "max_iter": 10000}, + "static_parameters": { "cache_size": 700, "max_iter": 10000 }, "schema": { - "kernel": { - "description": "Specifies the kernel type to be used in the algorithm", - "type": "string", - "default": "rbf", - "ui": { - "style": "radio", - "choices": ["Polynomial", "Radial basis function"], - "values": ["poly", "rbf"] - } - }, - "tol": { - "description": "Tolerance for stopping criteria.", - "type": "float", - "default": 0.0001, - "ui": { - "style": "radio", - "choices": [1e-05, 0.0001, 0.001, 0.01, 0.1] - } - }, - "C": { - "description": "Penalty parameter C of the error term.", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] - } - }, - "gamma": { - "description": "Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.", - "type": "float", - "default": 0.01, - "ui": { - "style": "radio", - "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] - } - }, - "degree": { - "description": "Degree of the 'poly' kernel.", - "type": "int", - "default": 3, - "ui": { - "style": "radio", - "choices": [2, 3 ] - } - }, - "coef0": { - "description": "Independent term in kernel function.", - "type": "float", - "default": 0.0 , - "ui": { - "style": "radio", - "choices": [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10] - } + "kernel": { + "description": "Specifies the kernel type to be used in the algorithm", + "type": "string", + "default": "rbf", + "ui": { + "style": "radio", + "choices": ["Polynomial", "Radial basis function"], + "values": ["poly", "rbf"] + } + }, + "tol": { + "description": "Tolerance for stopping criteria.", + "type": "float", + "default": 0.0001, + "ui": { + "style": "radio", + "choices": [1e-5, 0.0001, 0.001, 0.01, 0.1] + } + }, + "C": { + "description": "Penalty parameter C of the error term.", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] + } + }, + "gamma": { + "description": "Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.", + "type": "float", + "default": 0.01, + "ui": { + "style": "radio", + "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] + } + }, + "degree": { + "description": "Degree of the 'poly' kernel.", + "type": "int", + "default": 3, + "ui": { + "style": "radio", + "choices": [2, 3] } + }, + "coef0": { + "description": "Independent term in kernel function.", + "type": "float", + "default": 0.0, + "ui": { + "style": "radio", + "choices": [0.0, 0.0001, 0.001, 0.01, 0.1, 1, 10] + } + } }, "category": "regression" -}, -{ + }, + { "name": "KNeighborsRegressor", "path": "sklearn.neighbors", "categorical_encoding_strategy": "OrdinalEncoder", "description": "Nearest-neighbor regressor that classifies new data points based on the most common class among the k nearest data points.", "url": "https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html", "schema": { - "n_neighbors": { - "description": "Number of neighbors to use by default for k_neighbors queries.", - "type": "int", - "default": 5, - "ui": { - "style": "radio", - "choices": [1, 3, 5, 7, 9, 11] - } - }, - "weights": { - "description": "Weight function used in prediction.", - "type": "string", - "default": "uniform", - "ui": { - "style": "radio", - "choices": ["Uniform", "Distance"], - "values": ["uniform", "distance"] - } - }, - "p": { - "description": "Power parameter for the Minkowski metric.", - "type": "int", - "default": 2, - "ui": { - "style": "radio", - "choices": [1, 2] - } + "n_neighbors": { + "description": "Number of neighbors to use by default for k_neighbors queries.", + "type": "int", + "default": 5, + "ui": { + "style": "radio", + "choices": [1, 3, 5, 7, 9, 11] } + }, + "weights": { + "description": "Weight function used in prediction.", + "type": "string", + "default": "uniform", + "ui": { + "style": "radio", + "choices": ["Uniform", "Distance"], + "values": ["uniform", "distance"] + } + }, + "p": { + "description": "Power parameter for the Minkowski metric.", + "type": "int", + "default": 2, + "ui": { + "style": "radio", + "choices": [1, 2] + } + } }, "category": "regression" -}, -{ + }, + { "name": "GradientBoostingRegressor", "path": "sklearn.ensemble", "categorical_encoding_strategy": "OrdinalEncoder", "description": "An ensemble of decision trees that are iteratively trained on the dataset for the optimization of arbitrary differentiable loss functions.", "url": "https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html", "schema": { - "n_estimators": { - "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.", - "type": "int", - "default": 100, - "ui": { - "style": "radio", - "choices": [100, 500], - "grid_search": [100] - } - }, - "learning_rate": { - "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.", - "type": "float", - "default": 0.1, - "ui": { - "style": "radio", - "choices": [0.01, 0.1, 1] - } - }, - "max_depth": { - "description": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.", - "type": ["int", "none"], - "default": 3, - "ui": { - "style": "radio", - "choices": [1, 3, 5, 10] - } - }, - "min_samples_split": { - "description": "The minimum number of samples required to split an internal node.", - "type": ["int", "float"], - "default": 2, - "ui": { - "style": "radio", - "choices": [2, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "min_samples_leaf": { - "description": "The minimum number of samples required to be at a leaf node.", - "type": ["int", "float"], - "default": 1, - "ui": { - "style": "radio", - "choices": [1, 5, 10, 20], - "grid_search": [10, 20] - } - }, - "subsample": { - "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.5, 1] - } - }, - "max_features": { - "description": "The number of features to consider when looking for the best split.", - "type": ["int", "float", "string", "none"], - "default": "sqrt", - "ui": { - "style": "radio", - "choices": ["Square root", "Log2"], - "values": ["sqrt", "log2"] - } + "n_estimators": { + "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.", + "type": "int", + "default": 100, + "ui": { + "style": "radio", + "choices": [100, 500], + "grid_search": [100] } + }, + "learning_rate": { + "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.", + "type": "float", + "default": 0.1, + "ui": { + "style": "radio", + "choices": [0.01, 0.1, 1] + } + }, + "max_depth": { + "description": "Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables.", + "type": ["int", "none"], + "default": 3, + "ui": { + "style": "radio", + "choices": [1, 3, 5, 10] + } + }, + "min_samples_split": { + "description": "The minimum number of samples required to split an internal node.", + "type": ["int", "float"], + "default": 2, + "ui": { + "style": "radio", + "choices": [2, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "min_samples_leaf": { + "description": "The minimum number of samples required to be at a leaf node.", + "type": ["int", "float"], + "default": 1, + "ui": { + "style": "radio", + "choices": [1, 5, 10, 20], + "grid_search": [10, 20] + } + }, + "subsample": { + "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.5, 1] + } + }, + "max_features": { + "description": "The number of features to consider when looking for the best split.", + "type": ["int", "float", "string", "none"], + "default": "sqrt", + "ui": { + "style": "radio", + "choices": ["Square root", "Log2"], + "values": ["sqrt", "log2"] + } + } }, "category": "regression" -}, -{ + }, + { "name": "LassoLarsCV", "categorical_encoding_strategy": "OneHotEncoder", "path": "sklearn.linear_model", "description": "Cross-validated Lasso, using the LARS algorithm.", "url": "https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLarsCV.html", - "static_parameters" : {"max_iter": 10000}, + "static_parameters": { "max_iter": 10000 }, "schema": { - "fit_intercept": { - "description": "Fit intercept in addition to feature coefficients.", - "type": "bool", - "default": "true", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } - }, - "normalize": { - "description": "This parameter is ignored when fit_intercept is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm.", - "type": "bool", - "default": "true", - "ui": { - "style": "radio", - "choices": ["True", "False"], - "values": ["true", "false"] - } + "fit_intercept": { + "description": "Fit intercept in addition to feature coefficients.", + "type": "bool", + "default": "true", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] } + }, + "normalize": { + "description": "This parameter is ignored when fit_intercept is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm.", + "type": "bool", + "default": "true", + "ui": { + "style": "radio", + "choices": ["True", "False"], + "values": ["true", "false"] + } + } }, "category": "regression" -}, -{ + }, + { "name": "KernelRidge", "categorical_encoding_strategy": "OneHotEncoder", "path": "sklearn.kernel_ridge", "description": "Kernel ridge regression.", "url": "https://scikit-learn.org/stable/modules/generated/sklearn.kernel_ridge.KernelRidge.html", - "static_parameters" : {"kernel": "rbf"}, + "static_parameters": { "kernel": "rbf" }, "schema": { - "alpha": { - "description": "Small positive values of alpha improve the conditioning of the problem and reduce the variance of the estimates.", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.001, 0.01, 0.1, 1] - } - }, - "gamma": { - "description": "Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.", - "type": "float", - "default": 0.01, - "ui": { - "style": "radio", - "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] - } + "alpha": { + "description": "Small positive values of alpha improve the conditioning of the problem and reduce the variance of the estimates.", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.001, 0.01, 0.1, 1] } + }, + "gamma": { + "description": "Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.", + "type": "float", + "default": 0.01, + "ui": { + "style": "radio", + "choices": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100] + } + } }, "category": "regression" -}, -{ + }, + { "name": "XGBRegressor", "path": "xgboost", "categorical_encoding_strategy": "OrdinalEncoder", "description": "eXtreme Gradient Boosting classification", "url": "https://xgboost.readthedocs.io/en/latest/tutorials/model.html", - "static_parameters" : {"objective": "reg:squarederror"}, + "static_parameters": { "objective": "reg:squarederror" }, "schema": { "n_estimators": { - "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.", - "type": "int", - "default": 100, - "ui": { - "style": "radio", - "choices": [100, 500], - "grid_search": [100] - } + "description": "The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.", + "type": "int", + "default": 100, + "ui": { + "style": "radio", + "choices": [100, 500], + "grid_search": [100] + } }, "learning_rate": { - "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.", - "type": "float", - "default": 0.1, - "ui": { - "style": "radio", - "choices": [0.01, 0.1, 1] - } + "description": "Learning rate shrinks the contribution of each tree by learning_rate. There is a trade-off between learning_rate and n_estimators.", + "type": "float", + "default": 0.1, + "ui": { + "style": "radio", + "choices": [0.01, 0.1, 1] + } }, "max_depth": { - "description": "Maximum tree depth for base learners.", - "type": "int", - "default": 3, - "ui": { - "style": "radio", - "choices": [1, 3, 5, 10] - } + "description": "Maximum tree depth for base learners.", + "type": "int", + "default": 3, + "ui": { + "style": "radio", + "choices": [1, 3, 5, 10] + } }, "min_child_weight": { - "description": "Minimum sum of instance weight(hessian) needed in a child.", - "type": "int", - "default": 3, - "ui": { - "style": "radio", - "choices": [1, 3, 5, 10, 20] - } + "description": "Minimum sum of instance weight(hessian) needed in a child.", + "type": "int", + "default": 3, + "ui": { + "style": "radio", + "choices": [1, 3, 5, 10, 20] + } }, "subsample": { - "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.", - "type": "float", - "default": 1, - "ui": { - "style": "radio", - "choices": [0.5, 1] - } + "description": "The fraction of samples to be used for fitting the individual base learners. If smaller than 1.0 this results in Stochastic Gradient Boosting. subsample interacts with the parameter n_estimators. Choosing subsample \u003c 1.0 leads to a reduction of variance and an increase in bias.", + "type": "float", + "default": 1, + "ui": { + "style": "radio", + "choices": [0.5, 1] + } } }, "category": "regression" -} + } ] diff --git a/docker/dbmongo/files/users.json b/docker/dbmongo/files/users.json index d7af08576..67c2b25f7 100644 --- a/docker/dbmongo/files/users.json +++ b/docker/dbmongo/files/users.json @@ -1,56 +1,62 @@ [ -{ + { "username": "pennai", "firstname": "Penn", "lastname": "AI", "apikey": "Oed+kIyprDrUq/3oWU5Jpyd22PqhG/CsUvI8oc9l39E=", "roles": ["ai"], - "algorithms": ["DecisionTreeClassifier", - "GradientBoostingClassifier", - "KNeighborsClassifier", - "SVC", - "LogisticRegression", - "RandomForestClassifier", - "DecisionTreeRegressor", - "XGBRegressor", - "SVR", - "KNeighborsRegressor", - "KernelRidge", - "RandomForestRegressor"] -}, -{ - "algorithms": ["DecisionTreeClassifier", - "GradientBoostingClassifier", - "KNeighborsClassifier", - "SVC", - "LogisticRegression", - "RandomForestClassifier", - "DecisionTreeRegressor", - "XGBRegressor", - "SVR", - "KNeighborsRegressor", - "KernelRidge", - "RandomForestRegressor"], + "algorithms": [ + "DecisionTreeClassifier", + "GradientBoostingClassifier", + "KNeighborsClassifier", + "SVC", + "LogisticRegression", + "RandomForestClassifier", + "DecisionTreeRegressor", + "XGBRegressor", + "SVR", + "KNeighborsRegressor", + "KernelRidge", + "RandomForestRegressor" + ] + }, + { + "algorithms": [ + "DecisionTreeClassifier", + "GradientBoostingClassifier", + "KNeighborsClassifier", + "SVC", + "LogisticRegression", + "RandomForestClassifier", + "DecisionTreeRegressor", + "XGBRegressor", + "SVR", + "KNeighborsRegressor", + "KernelRidge", + "RandomForestRegressor" + ], "username": "pmlb", "firstname": "Pmlb", "lastname": "User" -}, -{ + }, + { "username": "testuser", "firstname": "Test", "lastname": "User", "roles": ["admin", "beginner"], - "algorithms": ["DecisionTreeClassifier", - "GradientBoostingClassifier", - "KNeighborsClassifier", - "SVC", - "LogisticRegression", - "RandomForestClassifier", - "DecisionTreeRegressor", - "XGBRegressor", - "SVR", - "KNeighborsRegressor", - "KernelRidge", - "RandomForestRegressor"] -} + "algorithms": [ + "DecisionTreeClassifier", + "GradientBoostingClassifier", + "KNeighborsClassifier", + "SVC", + "LogisticRegression", + "RandomForestClassifier", + "DecisionTreeRegressor", + "XGBRegressor", + "SVR", + "KNeighborsRegressor", + "KernelRidge", + "RandomForestRegressor" + ] + } ] diff --git a/machine/learn/driver.py b/machine/learn/driver.py index bf4af9489..89923fa13 100644 --- a/machine/learn/driver.py +++ b/machine/learn/driver.py @@ -71,7 +71,6 @@ def main(args, param_grid={}): print("param_grid_gene") print(param_grid) - # svd 일때 param grid 차있나? generate_results(model=model, @@ -92,15 +91,3 @@ def main(args, param_grid={}): args, param_grid = parse_args() main(args, param_grid) - - - - - - - # # args - # args= {'method': 'DecisionTreeClassifier', '_id': '631a1ca11b74ba0031813fbd', 'grid_search': False, 'criterion': 'gini', 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.0, 'max_features': None} - # # param_grid - # param_grid = {'n_estimators': [100], 'learning_rate': [0.01, 0.1, 1.0], 'max_depth': [1, 3, 5, 10], 'min_child_weight': [1, 3, 5, 10, 20], 'subsample': [0.5, 1.0]} - - # main(args, param_grid)