Skip to content

Commit

Permalink
build-in models, save sklearn & keras models
Browse files Browse the repository at this point in the history
  • Loading branch information
xinzouMicrosoft committed Aug 1, 2019
1 parent 6bb4c4a commit a26d650
Show file tree
Hide file tree
Showing 15 changed files with 302 additions and 261 deletions.
Empty file.
66 changes: 66 additions & 0 deletions builtin-models/builtin_models/environment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import yaml
import json
from sys import version_info

PYTHON_VERSION = "{major}.{minor}.{micro}".format(major=version_info.major,
minor=version_info.minor,
micro=version_info.micro)
_conda_header = """\
name: project_environment
channels:
- defaults
"""

_extra_index_url = "--extra-index-url=https://test.pypi.org/simple"
_alghost_pip = "alghost==0.0.59"
_azureml_defaults_pip = "azureml-defaults"

# temp solution, would remove later
_data_type_file_name = "data_type.json"
_data_ilearner_file_name = "data.ilearner"


def _generate_conda_env(path=None, additional_conda_deps=None, additional_pip_deps=None,
additional_conda_channels=None, install_alghost=True, install_azureml=True):
env = yaml.safe_load(_conda_header)
env["dependencies"] = ["python={}".format(PYTHON_VERSION), "git", "regex"]
pip_deps = ([_extra_index_url, _alghost_pip] if install_alghost else []) + (
[_azureml_defaults_pip] if install_alghost else []) + (
additional_pip_deps if additional_pip_deps else [])
if additional_conda_deps is not None:
env["dependencies"] += additional_conda_deps
env["dependencies"].append({"pip": pip_deps})
if additional_conda_channels is not None:
env["channels"] += additional_conda_channels

if path is not None:
with open(path, "w") as out:
yaml.safe_dump(env, stream=out, default_flow_style=False)
return None
else:
return env


def _generate_ilearner_files(path):
# Dump data_type.json as a work around until SMT deploys
dct = {
"Id": "ILearnerDotNet",
"Name": "ILearner .NET file",
"ShortName": "Model",
"Description": "A .NET serialized ILearner",
"IsDirectory": False,
"Owner": "Microsoft Corporation",
"FileExtension": "ilearner",
"ContentType": "application/octet-stream",
"AllowUpload": False,
"AllowPromotion": False,
"AllowModelPromotion": True,
"AuxiliaryFileExtension": None,
"AuxiliaryContentType": None
}
with open(os.path.join(path, _data_type_file_name), 'w') as fp:
json.dump(dct, fp)
# Dump data.ilearner as a work around until data type design
with open(os.path.join(path, _data_ilearner_file_name), 'w') as fp:
fp.writelines('{}')
67 changes: 67 additions & 0 deletions builtin-models/builtin_models/keras.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import yaml

from builtin_models.environment import _generate_conda_env
from builtin_models.environment import _generate_ilearner_files

FLAVOR_NAME = "keras"
model_file_name = "model.h5"
conda_file_name = "conda.yaml"
model_spec_file_name = "model_spec.yml"

def _get_default_conda_env():
import keras
import tensorflow as tf

return _generate_conda_env(
additional_pip_deps=[
"keras=={}".format(keras.__version__),
"tensorflow=={}".format(tf.__version__),
])


def _save_conda_env(path, conda_env=None):
if conda_env is None:
conda_env = _get_default_conda_env()
elif not isinstance(conda_env, dict):
with open(conda_env, "r") as f: # conda_env is a file
conda_env = yaml.safe_load(f)
with open(os.path.join(path, conda_file_name), "w") as f:
yaml.safe_dump(conda_env, stream=f, default_flow_style=False)


def _save_model_spec(path):
spec = {
'flavor' : {
'framework' : FLAVOR_NAME
},
FLAVOR_NAME: {
'model_file_path': model_file_name
},
'conda': {
'conda_file_path': conda_file_name
},
}
with open(os.path.join(path, model_spec_file_name), 'w') as fp:
yaml.dump(spec, fp, default_flow_style=False)


def load_model_from_local_file(path):
from keras.models import load_model
return load_model(path)


def save_model(keras_model, path, conda_env=None):
import keras

if(not path.endswith('/')):
path += '/'
if not os.path.exists(path):
os.makedirs(path)

keras_model.save(os.path.join(path, model_file_name))
_save_conda_env(path, conda_env)
_save_model_spec(path)
_generate_ilearner_files(path) # temp solution, to remove later


71 changes: 71 additions & 0 deletions builtin-models/builtin_models/sklearn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import yaml
import pickle

from builtin_models.environment import _generate_conda_env
from builtin_models.environment import _generate_ilearner_files

FLAVOR_NAME = "sklearn"
model_file_name = "model.pkl"
conda_file_name = "conda.yaml"
model_spec_file_name = "model_spec.yml"

def _get_default_conda_env():
import sklearn

return _generate_conda_env(
additional_pip_deps=[
"scikit-learn=={}".format(sklearn.__version__)
])


def _save_conda_env(path, conda_env=None):
if conda_env is None:
conda_env = _get_default_conda_env()
elif not isinstance(conda_env, dict):
with open(conda_env, "r") as f: # conda_env is a file
conda_env = yaml.safe_load(f)
with open(os.path.join(path, conda_file_name), "w") as f:
yaml.safe_dump(conda_env, stream=f, default_flow_style=False)


def _save_model_spec(path):
spec = {
'flavor' : {
'framework' : FLAVOR_NAME
},
FLAVOR_NAME: {
'model_file_path': model_file_name
},
'conda': {
'conda_file_path': conda_file_name
},
}
with open(os.path.join(path, model_spec_file_name), 'w') as fp:
yaml.dump(spec, fp, default_flow_style=False)


def _save_model(sklearn_model, path):
with open(os.path.join(path, model_file_name), "wb") as fb:
pickle.dump(sklearn_model, fb)


def load_model_from_local_file(path):
with open(path, "rb") as f:
return pickle.load(f)


def save_model(sklearn_model, path, conda_env=None):
import sklearn

if(not path.endswith('/')):
path += '/'
if not os.path.exists(path):
os.makedirs(path)

_save_model(sklearn_model, path)
_save_conda_env(path, conda_env)
_save_model_spec(path)
_generate_ilearner_files(path) # temp solution, to remove later


12 changes: 12 additions & 0 deletions builtin-models/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from setuptools import setup

# python setup.py install
setup(
name="builtin_models",
version="0.0.1",
description="builtin_models",
packages=["builtin_models"],
author="Xin Zou",
license="MIT",
include_package_data=True,
)
Empty file added builtin-models/test/__init__.py
Empty file.
19 changes: 19 additions & 0 deletions builtin-models/test/builtin_models_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

# python -m test.builtin_models_test
if __name__ == '__main__':
# keras test
from builtin_models.keras import *
print('---keras test---')
model = load_model_from_local_file('D:/GIT/CustomModules-migu-NewYamlTest2/dstest/model/keras-mnist/model.h5')
print('------')
save_model(model, "./test/outputModels/keras/")
print('********')

#sklearn test
from builtin_models.sklearn import *
print('---sklearn test---')
model = load_model_from_local_file('D:/GIT/CustomModules-migu-NewYamlTest2/dstest/dstest/sklearn/model/sklearn/model.pkl')
print('------')
save_model(model, "./test/outputModels/sklearn/")
print('********')

31 changes: 2 additions & 29 deletions builtin-score/builtin_score/keras_score_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,10 @@ class KerasScoreModule(object):

def __init__(self, model_path, config):
keras_conf = config["keras"]
serializer = keras_conf.get('serialization_format', 'load_model')
if serializer == 'load_model':
self.model = load_model(os.path.join(model_path, keras_conf[constants.MODEL_FILE_PATH_KEY]))
elif serializer == 'load_weights':
self.load_model_via_weights(model_path, keras_conf)
self.model = load_model(os.path.join(model_path, keras_conf[constants.MODEL_FILE_PATH_KEY]))
print(f"Successfully loaded model from {model_path}")


def run(self, df):
df_output = pd.DataFrame([])
for _, row in df.iterrows():
Expand All @@ -38,31 +35,7 @@ def run(self, df):

return df_output

def load_model_via_weights(self, model_path, config):
model_yaml_file = config.get('model_yaml_file','')
model_json_file = config.get('model_json_file','')
model_data = ''
if model_json_file != '':
import json
from keras.models import model_from_json
with open(os.path.join(model_path, model_json_file), 'r') as f:
model_data = json.load(f)
self.model = model_from_json(model_data)
elif model_yaml_file != '':
import yaml
from keras.models import model_from_yaml
with open(os.path.join(model_path, model_yaml_file), 'r') as f:
model_data = yaml.safe_load(f)
self.model = model_from_yaml(model_data)
else:
raise Exception(f"Unable to load model, config = {config}")

model_weights_file = config.get('model_weights_file','')
if model_weights_file == '':
raise Exception(f"model_weights_file is empty, config = {config}")

self.model.load_weights(os.path.join(model_path, model_weights_file))

def is_image(self, row):
# TO DO:
if(len(row)>100):
Expand Down
15 changes: 3 additions & 12 deletions builtin-score/builtin_score/sklearn_score_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,9 @@ class SklearnScoreModule(object):
def __init__(self, model_path, config):
sklearn_conf = config["sklearn"]
model_file_path = os.path.join(model_path, sklearn_conf[constants.MODEL_FILE_PATH_KEY])
DEFAULT_SERIALIZATION_METHOD = "pickle"
serialization_method = sklearn_conf.get(constants.SERIALIZATION_METHOD_KEY)
if serialization_method is None:
print(f"Using default deserializtion method: {DEFAULT_SERIALIZATION_METHOD}")
serialization_method = pickle
if serialization_method == "joblib":
self.model = joblib.load(model_file_path)
elif serialization_method == "pickle":
with open(model_file_path, "rb") as fp:
self.model = pickle.load(fp)
else:
raise Exception(f"Unrecognized serializtion format {serialization_method}")
with open(model_file_path, "rb") as fp:
self.model = pickle.load(fp)


def run(self, df):
y = self.model.predict(df)
Expand Down
17 changes: 2 additions & 15 deletions dstest/dstest/keras/saved_model_predict_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,12 @@
import pandas as pd
import numpy as np

# This is a placeholder for a Google-internal import.
import tensorflow as tf
from builtin_score.builtin_score_module import *
from builtin_score.keras_score_module import KerasScoreModule
from keras.models import load_model
from keras.datasets import mnist


def load_model_then_predict1(model_spec_file = 'model/keras-mnist/model_spec.yml'):
def load_model_then_predict(model_path = "./model/keras-mnist/"):
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_test = x_test.reshape(x_test.shape[0], -1) # x_test shape [x_test.shape[0], 784]
x_test = x_test[:8] # only pick 8 imgs
Expand All @@ -22,21 +19,11 @@ def load_model_then_predict1(model_spec_file = 'model/keras-mnist/model_spec.yml
df = pd.DataFrame(data=x_test, columns=['img']*784, dtype=np.float64)
df.to_csv('mnist_kera_test_data.csv')

with open(model_spec_file) as fp:
config = yaml.safe_load(fp)

model_path = "./model/keras-mnist/"
keras_model = KerasScoreModule(model_path, config)
result = keras_model.run(df)
print(result)

module = BuiltinScoreModule(model_path)
result = module.run(df)
print('=====buildinScoreModule=======')
print(result)

# python -m dstest.keras.saved_model_predict_test
if __name__ == '__main__':
load_model_then_predict1()
print('\n===============another load model method================\n')
load_model_then_predict1('model/keras-mnist/model_weights_spec.yml')
load_model_then_predict()
Loading

0 comments on commit a26d650

Please sign in to comment.