Skip to content
This repository has been archived by the owner on Nov 16, 2023. It is now read-only.

Enable EnsembleClassifier and EnsembleRegressor #207

Merged
merged 19 commits into from
Aug 6, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/DotNetBridge/Bridge.cs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
env.ComponentCatalog.RegisterAssembly(typeof(CategoricalCatalog).Assembly); // ML.Transforms
env.ComponentCatalog.RegisterAssembly(typeof(FastTreeRegressionTrainer).Assembly); // ML.FastTree

//env.ComponentCatalog.RegisterAssembly(typeof(EnsembleModelParameters).Assembly); // ML.Ensemble
env.ComponentCatalog.RegisterAssembly(typeof(EnsembleModelParameters).Assembly); // ML.Ensemble
env.ComponentCatalog.RegisterAssembly(typeof(KMeansModelParameters).Assembly); // ML.KMeansClustering
env.ComponentCatalog.RegisterAssembly(typeof(PcaModelParameters).Assembly); // ML.PCA
env.ComponentCatalog.RegisterAssembly(typeof(CVSplit).Assembly); // ML.EntryPoints
Expand Down
37 changes: 34 additions & 3 deletions src/python/nimbusml.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@
<Compile Include="nimbusml\ensemble\booster\gbdt.py" />
<Compile Include="nimbusml\ensemble\booster\goss.py" />
<Compile Include="nimbusml\ensemble\booster\__init__.py" />
<Compile Include="nimbusml\ensemble\feature_selector\allfeatureselector.py" />
<Compile Include="nimbusml\ensemble\feature_selector\randomfeatureselector.py" />
<Compile Include="nimbusml\ensemble\feature_selector\__init__.py" />
<Compile Include="nimbusml\ensemble\subset_selector\allinstanceselector.py" />
<Compile Include="nimbusml\ensemble\subset_selector\bootstrapselector.py" />
<Compile Include="nimbusml\ensemble\subset_selector\randompartitionselector.py" />
<Compile Include="nimbusml\ensemble\subset_selector\__init__.py" />
<Compile Include="nimbusml\ensemble\ensembleclassifier.py" />
<Compile Include="nimbusml\ensemble\ensembleregressor.py" />
<Compile Include="nimbusml\ensemble\fastforestbinaryclassifier.py" />
<Compile Include="nimbusml\ensemble\fastforestregressor.py" />
<Compile Include="nimbusml\ensemble\fasttreesbinaryclassifier.py" />
Expand All @@ -68,13 +77,16 @@
<Compile Include="nimbusml\examples\ColumnSelector.py" />
<Compile Include="nimbusml\examples\CountSelector.py" />
<Compile Include="nimbusml\examples\CV.py" />
<Compile Include="nimbusml\examples\EnsembleClassifier.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\AveragedPerceptronBinaryClassifier_infert_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\Binner_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\BootStrapSample_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\CharTokenizer_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\ColumnConcatenator_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\ColumnDuplicator_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\Concat_Drop_Select_Columns_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\EnsembleClassifier_iris_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\EnsembleRegressor_airquality_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\FactorizationMachineBinaryClassifier_infert_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\FastForestBinaryClassifier_infert_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\FastForestRegressor_airquality_df.py" />
Expand Down Expand Up @@ -126,6 +138,7 @@
<Compile Include="nimbusml\examples\examples_from_dataframe\WordEmbedding_df.py" />
<Compile Include="nimbusml\examples\examples_from_dataframe\__init__.py" />
<Compile Include="nimbusml\examples\Exp.py" />
<Compile Include="nimbusml\examples\EnsembleRegressor.py" />
<Compile Include="nimbusml\examples\FactorizationMachineBinaryClassifier.py" />
<Compile Include="nimbusml\examples\FastForestBinaryClassifier.py" />
<Compile Include="nimbusml\examples\FastForestRegressor.py" />
Expand Down Expand Up @@ -212,6 +225,15 @@
<Compile Include="nimbusml\internal\core\ensemble\booster\gbdt.py" />
<Compile Include="nimbusml\internal\core\ensemble\booster\goss.py" />
<Compile Include="nimbusml\internal\core\ensemble\booster\__init__.py" />
<Compile Include="nimbusml\internal\core\ensemble\feature_selector\allfeatureselector.py" />
<Compile Include="nimbusml\internal\core\ensemble\feature_selector\randomfeatureselector.py" />
<Compile Include="nimbusml\internal\core\ensemble\feature_selector\__init__.py" />
<Compile Include="nimbusml\internal\core\ensemble\subset_selector\allinstanceselector.py" />
<Compile Include="nimbusml\internal\core\ensemble\subset_selector\bootstrapselector.py" />
<Compile Include="nimbusml\internal\core\ensemble\subset_selector\randompartitionselector.py" />
<Compile Include="nimbusml\internal\core\ensemble\subset_selector\__init__.py" />
<Compile Include="nimbusml\internal\core\ensemble\ensembleclassifier.py" />
<Compile Include="nimbusml\internal\core\ensemble\ensembleregressor.py" />
<Compile Include="nimbusml\internal\core\ensemble\gambinaryclassifier.py" />
<Compile Include="nimbusml\internal\core\ensemble\gamregressor.py" />
<Compile Include="nimbusml\internal\core\ensemble\lightgbmranker.py" />
Expand Down Expand Up @@ -287,6 +309,8 @@
<Compile Include="nimbusml\internal\entrypoints\timeseriesprocessingentrypoints_ssaforecasting.py" />
<Compile Include="nimbusml\internal\entrypoints\timeseriesprocessingentrypoints_ssaspikedetector.py" />
<Compile Include="nimbusml\internal\entrypoints\trainers_averagedperceptronbinaryclassifier.py" />
<Compile Include="nimbusml\internal\entrypoints\trainers_ensembleclassification.py" />
<Compile Include="nimbusml\internal\entrypoints\trainers_ensembleregression.py" />
<Compile Include="nimbusml\internal\entrypoints\trainers_fastforestbinaryclassifier.py" />
<Compile Include="nimbusml\internal\entrypoints\trainers_fastforestregressor.py" />
<Compile Include="nimbusml\internal\entrypoints\trainers_fasttreebinaryclassifier.py" />
Expand Down Expand Up @@ -411,13 +435,18 @@
<Compile Include="nimbusml\internal\entrypoints\_ensemblemulticlassoutputcombiner_multivoting.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensemblemulticlassoutputcombiner_multiweightedaverage.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensemblemulticlasssubmodelselector_allselectormulticlass.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensemblemulticlasssubmodelselector_bestdiverseselectormulticlass.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensemblemulticlasssubmodelselector_bestperformanceselectormulticlass.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensembleregressiondiversitymeasure_regressiondisagreementdiversitymeasure.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensembleregressionoutputcombiner_average.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensembleregressionoutputcombiner_median.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensembleregressionoutputcombiner_regressionstacking.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensembleregressionsubmodelselector_allselector.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensembleregressionsubmodelselector_bestdiverseselectorregression.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensembleregressionsubmodelselector_bestperformanceregressionselector.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensemblesubsetselector_allinstanceselector.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensemblesubsetselector_bootstrapselector.py" />
<Compile Include="nimbusml\internal\entrypoints\_ensemblesubsetselector_randompartitionselector.py" />
<Compile Include="nimbusml\internal\entrypoints\_fasttreetrainer_fasttreebinaryclassification.py" />
<Compile Include="nimbusml\internal\entrypoints\_fasttreetrainer_fasttreeranking.py" />
<Compile Include="nimbusml\internal\entrypoints\_fasttreetrainer_fasttreeregression.py" />
Expand Down Expand Up @@ -561,9 +590,7 @@
<Compile Include="nimbusml\preprocessing\normalization\minmaxscaler.py" />
<Compile Include="nimbusml\preprocessing\normalization\__init__.py" />
<Compile Include="nimbusml\preprocessing\schema\columnconcatenator.py" />
<Compile Include="nimbusml\preprocessing\schema\columndropper.py">
<SubType>Code</SubType>
</Compile>
<Compile Include="nimbusml\preprocessing\schema\columndropper.py" />
<Compile Include="nimbusml\preprocessing\schema\columnduplicator.py" />
<Compile Include="nimbusml\preprocessing\schema\columnselector.py" />
<Compile Include="nimbusml\preprocessing\schema\typeconverter.py" />
Expand Down Expand Up @@ -751,6 +778,8 @@
<Folder Include="nimbusml\decomposition\" />
<Folder Include="nimbusml\ensemble\" />
<Folder Include="nimbusml\ensemble\booster\" />
<Folder Include="nimbusml\ensemble\feature_selector\" />
<Folder Include="nimbusml\ensemble\subset_selector\" />
<Folder Include="nimbusml\examples\" />
<Folder Include="nimbusml\examples\examples_from_dataframe\" />
<Folder Include="nimbusml\feature_extraction\" />
Expand All @@ -766,6 +795,8 @@
<Folder Include="nimbusml\internal\core\decomposition\" />
<Folder Include="nimbusml\internal\core\ensemble\" />
<Folder Include="nimbusml\internal\core\ensemble\booster\" />
<Folder Include="nimbusml\internal\core\ensemble\feature_selector\" />
<Folder Include="nimbusml\internal\core\ensemble\subset_selector\" />
<Folder Include="nimbusml\internal\core\feature_extraction\" />
<Folder Include="nimbusml\internal\core\feature_extraction\categorical\" />
<Folder Include="nimbusml\internal\core\feature_extraction\image\" />
Expand Down
4 changes: 4 additions & 0 deletions src/python/nimbusml/ensemble/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from .ensembleclassifier import EnsembleClassifier
from .ensembleregressor import EnsembleRegressor
from .fastforestbinaryclassifier import FastForestBinaryClassifier
from .fastforestregressor import FastForestRegressor
from .fasttreesbinaryclassifier import FastTreesBinaryClassifier
Expand All @@ -11,6 +13,8 @@
from .lightgbmregressor import LightGbmRegressor

__all__ = [
'EnsembleClassifier',
'EnsembleRegressor',
'FastForestBinaryClassifier',
'FastForestRegressor',
'FastTreesBinaryClassifier',
Expand Down
127 changes: 127 additions & 0 deletions src/python/nimbusml/ensemble/ensembleclassifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------------------------
# - Generated by tools/entrypoint_compiler.py: do not edit by hand
"""
EnsembleClassifier
"""

__all__ = ["EnsembleClassifier"]


from sklearn.base import ClassifierMixin

from ..base_predictor import BasePredictor
from ..internal.core.ensemble.ensembleclassifier import \
EnsembleClassifier as core
from ..internal.utils.utils import trace
from .feature_selector import AllFeatureSelector
from .subset_selector import BootstrapSelector


class EnsembleClassifier(core, BasePredictor, ClassifierMixin):
"""
**Description**
Train multiclass ensemble.

:param feature: see `Columns </nimbusml/concepts/columns>`_.

:param label: see `Columns </nimbusml/concepts/columns>`_.

:param sampling_type: Sampling Type.

:param num_models: Number of models per batch. If not specified, will
default to 50 if there is only one base predictor, or the number of
base predictors otherwise.

:param sub_model_selector_type: Algorithm to prune the base learners for
selective Ensemble.

:param output_combiner: Output combiner.

:param normalize: If ``Auto``, the choice to normalize depends on the
preference declared by the algorithm. This is the default choice. If
``No``, no normalization is performed. If ``Yes``, normalization always
performed. If ``Warn``, if normalization is needed by the algorithm, a
warning message is displayed but normalization is not performed. If
normalization is performed, a ``MaxMin`` normalizer is used. This
normalizer preserves sparsity by mapping zero to zero.

:param caching: Whether trainer should cache input training data.

:param train_parallel: All the base learners will run asynchronously if the
value is true.

:param batch_size: Batch size.

:param show_metrics: True, if metrics for each model need to be evaluated
and shown in comparison table. This is done by using validation set if
available or the training set.

:param params: Additional arguments sent to compute engine.

"""

@trace
def __init__(
self,
sampling_type=BootstrapSelector(
feature_selector=AllFeatureSelector()),
num_models=None,
sub_model_selector_type=None,
output_combiner=None,
normalize='Auto',
caching='Auto',
train_parallel=False,
batch_size=-1,
show_metrics=False,
feature=None,
label=None,
**params):

if 'feature_column_name' in params:
raise NameError(
"'feature_column_name' must be renamed to 'feature'")
if feature:
params['feature_column_name'] = feature
if 'label_column_name' in params:
raise NameError(
"'label_column_name' must be renamed to 'label'")
if label:
params['label_column_name'] = label
BasePredictor.__init__(self, type='classifier', **params)
core.__init__(
self,
sampling_type=sampling_type,
num_models=num_models,
sub_model_selector_type=sub_model_selector_type,
output_combiner=output_combiner,
normalize=normalize,
caching=caching,
train_parallel=train_parallel,
batch_size=batch_size,
show_metrics=show_metrics,
**params)
self.feature = feature
self.label = label

@trace
def predict_proba(self, X, **params):
'''
Returns probabilities
'''
return self._predict_proba(X, **params)

@trace
def decision_function(self, X, **params):
'''
Returns score values
'''
return self._decision_function(X, **params)

def get_params(self, deep=False):
"""
Get the parameters for this operator.
"""
return core.get_params(self)
113 changes: 113 additions & 0 deletions src/python/nimbusml/ensemble/ensembleregressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------------------------
# - Generated by tools/entrypoint_compiler.py: do not edit by hand
"""
EnsembleRegressor
"""

__all__ = ["EnsembleRegressor"]


from sklearn.base import RegressorMixin

from ..base_predictor import BasePredictor
from ..internal.core.ensemble.ensembleregressor import \
EnsembleRegressor as core
from ..internal.utils.utils import trace
from .feature_selector import AllFeatureSelector
from .subset_selector import BootstrapSelector


class EnsembleRegressor(core, BasePredictor, RegressorMixin):
"""
**Description**
Train regression ensemble.

:param feature: see `Columns </nimbusml/concepts/columns>`_.

:param label: see `Columns </nimbusml/concepts/columns>`_.

:param sampling_type: Sampling Type.

:param num_models: Number of models per batch. If not specified, will
default to 50 if there is only one base predictor, or the number of
base predictors otherwise.

:param sub_model_selector_type: Algorithm to prune the base learners for
selective Ensemble.

:param output_combiner: Output combiner.

:param normalize: If ``Auto``, the choice to normalize depends on the
preference declared by the algorithm. This is the default choice. If
``No``, no normalization is performed. If ``Yes``, normalization always
performed. If ``Warn``, if normalization is needed by the algorithm, a
warning message is displayed but normalization is not performed. If
normalization is performed, a ``MaxMin`` normalizer is used. This
normalizer preserves sparsity by mapping zero to zero.

:param caching: Whether trainer should cache input training data.

:param train_parallel: All the base learners will run asynchronously if the
value is true.

:param batch_size: Batch size.

:param show_metrics: True, if metrics for each model need to be evaluated
and shown in comparison table. This is done by using validation set if
available or the training set.

:param params: Additional arguments sent to compute engine.

"""

@trace
def __init__(
self,
sampling_type=BootstrapSelector(
feature_selector=AllFeatureSelector()),
num_models=None,
sub_model_selector_type=None,
output_combiner=None,
normalize='Auto',
caching='Auto',
train_parallel=False,
batch_size=-1,
show_metrics=False,
feature=None,
label=None,
**params):

if 'feature_column_name' in params:
raise NameError(
"'feature_column_name' must be renamed to 'feature'")
if feature:
params['feature_column_name'] = feature
if 'label_column_name' in params:
raise NameError(
"'label_column_name' must be renamed to 'label'")
if label:
params['label_column_name'] = label
BasePredictor.__init__(self, type='regressor', **params)
core.__init__(
self,
sampling_type=sampling_type,
num_models=num_models,
sub_model_selector_type=sub_model_selector_type,
output_combiner=output_combiner,
normalize=normalize,
caching=caching,
train_parallel=train_parallel,
batch_size=batch_size,
show_metrics=show_metrics,
**params)
self.feature = feature
self.label = label

def get_params(self, deep=False):
"""
Get the parameters for this operator.
"""
return core.get_params(self)
7 changes: 7 additions & 0 deletions src/python/nimbusml/ensemble/feature_selector/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .allfeatureselector import AllFeatureSelector
from .randomfeatureselector import RandomFeatureSelector

__all__ = [
'AllFeatureSelector',
'RandomFeatureSelector'
]
Loading