Skip to content

Commit

Permalink
conversion model to json (#36)
Browse files Browse the repository at this point in the history
* v0.1.6-dev

* add conversion model to json : 'param_from_sklearn_model' + corresponding tests

* add new numpy type to be cast to python type

* test if object can be json serialized
  • Loading branch information
Lionel MASSOULARD authored and GitHub Enterprise committed Feb 17, 2020
1 parent bb35933 commit 4f67fd6
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 121 deletions.
2 changes: 1 addition & 1 deletion aikit/__meta__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Automatic Tool Kit for Machine Learning and Datascience
"""

__version__ = "0.1.5"
__version__ = "0.1.6-dev"

__author__ = "Lionel Massoulard"

Expand Down
11 changes: 2 additions & 9 deletions aikit/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,9 @@ class SpecialModels:

GraphPipeline = "GraphPipeline"
Pipeline = "Pipeline"
ModelsUnion = "ModelsUnion"
FeatureUnion = "FeatureUnion"
ColumnsSelector = "ColumnsSelector"

alls = (GraphPipeline, Pipeline, ModelsUnion, ColumnsSelector)
alls = (GraphPipeline, Pipeline, FeatureUnion, ColumnsSelector)


# In[]
def verif_all():
test_TypeOfProblem()
test_TypeOfVariables()
test_StepCategories()
test_DataTypes()
test_SpecialModelss()
142 changes: 91 additions & 51 deletions aikit/model_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
"""

import copy
import inspect

from sklearn.base import BaseEstimator
import numpy as np

from aikit.model_registration import DICO_NAME_KLASS
from aikit.enums import SpecialModels
Expand Down Expand Up @@ -158,54 +162,90 @@ def sklearn_model_from_param(param, _copy=True):
return param


# def param_from_sklearn_model(model, _simplify_default = False):
#
# if isinstance(model,Pipeline):
# return (SpecialModels.Pipeline,{"steps":[(name,param_from_model(step)) for name,step in model.steps]})
#
# elif isinstance(model,ModelsUnion):
# return (SpecialModels.ModelsUnion ,{"transformer_list":[(name,param_from_model(step, _simplify_default = _simplify_default)) for name,step in model.transformer_list],
# "n_jobs":model.n_jobs,
# "transformer_weights":model.transformer_weights
# })
#
# elif isinstance(model, GraphPipeline):
# return (SpecialModels.GraphPipeline , {n:param_from_model(p) for n,p in model.models.items() } , model.edges)
#
#
# elif isinstance(model,BaseEstimator) and model.__class__.__name__ in MODEL_REGISTER.dico_name_class:
# if not _simplify_default:
# param_dico = {k:param_from_model(v,_simplify_default = _simplify_default) for k,v in model.get_params().items() }
# else:
# # Experimental
# default_params = _get_default_params(model.__class__)
# param_dico = {}
# for k,v in model.get_params().items():
# if not (k in default_params and v == default_params[k]):
# param_dico[k] = param_from_model(v, _simplify_default = _simplify_default)
#
# return (model.__class__.__name__,param_dico)
# # Ici : peut etre faire un filtre si on a les valeurs par default ?
#
# elif isinstance(model, (dict,OrderedDict)):
# res = model.__class__()
# for k,v in model.items():
# res[k] = param_from_model(v, _simplify_default = _simplify_default)
#
# return res
#
# elif isinstance(model,list):
# return [param_from_model(v,_simplify_default = _simplify_default) for v in model]
#
# elif isinstance(model,tuple):
# return tuple([param_from_model(v,_simplify_default = _simplify_default) for v in model])
#
# elif isinstance(model,(np.int64,np.int32)):
# return int(model)
#
# elif isinstance(model,(np.float64,np.float32)):
# return float(model)
#
# else:
# return model
# In[]
def filtered_get_params(model, simplify_default=True):

if not simplify_default:
return model.get_params(deep=False)

params = model.get_params(deep=False)
new_params = params.__class__()

args = inspect.signature(model.__class__)
for param, value in params.items():
skip=False
if param in args.parameters:
if value == args.parameters[param].default:
skip=True
if not skip:
new_params[param]=value

return new_params


def param_from_sklearn_model(model, simplify_default=True):
""" convert a sklearn model into a its json representation
Parameters
----------
model : sklearn.BaseEstimator
the model to convert
simplify_default : boolean, default=True
if True will simplify the arguments that are identical to the default one
Returns
-------
model json representation
Example
-------
>>> model = RandomForestClassifier(n_estimators=200)
>>> param_from_sklearn_model(model)
>>> ('RandomForestClassifier', {'n_estimators': 200})
"""
if isinstance(model, BaseEstimator):
if model.__class__.__name__ not in DICO_NAME_KLASS._mapping:
print(f"You'll need to include your class '{model.__class__.__name__}' into the register to be able to reload it")

if simplify_default:
param_dico = {k:param_from_sklearn_model(v, simplify_default=simplify_default) for k,v in filtered_get_params(model, simplify_default=True).items() }
else:
param_dico = {k:param_from_sklearn_model(v, simplify_default=simplify_default) for k,v in model.get_params(deep=False).items() }

return (model.__class__.__name__, param_dico)


elif isinstance(model, dict):
res = model.__class__() # to keep the same format (dict, OrderedDict)
for k,v in model.items():
res[k] = param_from_sklearn_model(v, simplify_default=simplify_default)

return res

elif isinstance(model, list):
return [param_from_sklearn_model(v, simplify_default=simplify_default) for v in model]

elif isinstance(model, tuple):
return tuple([param_from_sklearn_model(v, simplify_default=simplify_default) for v in model])

elif isinstance(model, np.number):
if model.dtype.kind == "i":
return int(model)

elif model.dtype.kind == "f":
return float(model)

else:
return model

elif isinstance(model, np.bool_):
return bool(model)

elif isinstance(model, np.str_):
return str(model)

else:
return model
12 changes: 0 additions & 12 deletions aikit/transformers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,18 +1241,6 @@ def target_inverse_transform(self, my):
return np.sign(my) * (np.exp(np.log1p(self.ll * np.abs(my)) / self.ll) - 1)


# def column_iterate(X, type_of_data = None):
# if type_of_data is None:
# type_of_data = get_type(X)
#
# if type_of_data in (DataTypes.DataFrame,DataTypes.NumpyArray):
# for column in X.columns:
# yield column,X[column]
#
# elif type_of_data in (DataTypes.NumpyArray, DataTypes.SparseArray):
# for j in range(X.shape[1]):
# yield j,X[:,j]

# In[]
def _gen_column_iterator(X, type_of_data=None):
""" generic column interator, helper to iterator if the column of a data object """
Expand Down
Loading

0 comments on commit 4f67fd6

Please sign in to comment.