diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000000..7e9dc486f6 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,19 @@ +# Content + +Seldon-core-examples repository provides out-of-the-box machine learning models examples to deploy using [seldon-core](https://github.com/SeldonIO/seldon-core). Since seldon-core deploys dockerized versions of your models, the repository also includes wrapping scripts that allow you to create docker images of such models which are deployable with seldon-core. + +## Wrapping scripts + +The repository contains two wrapping scripts at the moment +* wrap-model-in-host : If you are using docker on your machine, this script will build a docker image of your model locally. +* wrap-model-in-minikube: If you are using minikube, this script will build a docker image of your model directly on your minikube cluster (for usage see [seldon-core docs](https://github.com/SeldonIO/seldon-core/blob/master/docs/wrappers/readme.md)). + +## Examples + +The examples in the "models" folder are out-of-the-box machine learning models packaged as required by seldon wrappers. Each model folder usually includes a script to create and save the model, a model python file and a requirements file. +As an example, we describe the content of the folder "models/sklearn_iris". Check out [seldon wrappers guidelines](https://github.com/SeldonIO/seldon-core/blob/master/docs/wrappers/readme.md)) for more details about packaging models. + +* train_iris.py : Script to train and save a sklearn iris classifier +* IrisClassifier.py : The file used by seldon-wrappers to load and serve your saved model. +* requirements.txt : A list of packages required by your model +* sklearn_iris_deployment.json : A configuration json file used to deploy your model in [seldon-core](https://github.com/SeldonIO/seldon-core#quick-start). \ No newline at end of file diff --git a/examples/models/.keep b/examples/models/.keep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/models/deep_mnist/DeepMnist.py b/examples/models/deep_mnist/DeepMnist.py new file mode 100644 index 0000000000..657eea5386 --- /dev/null +++ b/examples/models/deep_mnist/DeepMnist.py @@ -0,0 +1,18 @@ +import tensorflow as tf + +class DeepMnist(object): + def __init__(self): + self.class_names = ["class:{}".format(str(i)) for i in range(10)] + self.sess = tf.Session() + saver = tf.train.import_meta_graph("model/deep_mnist_model.meta") + saver.restore(self.sess,tf.train.latest_checkpoint("./model/")) + + graph = tf.get_default_graph() + self.x = graph.get_tensor_by_name("x:0") + self.y = graph.get_tensor_by_name("y:0") + + def predict(self,X,feature_names): + predictions = self.sess.run(self.y,feed_dict={self.x:X}) + return predictions + + diff --git a/examples/models/deep_mnist/contract.json b/examples/models/deep_mnist/contract.json new file mode 100644 index 0000000000..b62a64e64c --- /dev/null +++ b/examples/models/deep_mnist/contract.json @@ -0,0 +1,22 @@ +{ + "features":[ + { + "name":"x", + "dtype":"FLOAT", + "ftype":"continuous", + "range":[0,1], + "repeat":784 + } + ], + "targets":[ + { + "name":"class", + "dtype":"FLOAT", + "ftype":"continuous", + "range":[0,1], + "repeat":10 + } + ] +} + + diff --git a/examples/models/deep_mnist/create_model.py b/examples/models/deep_mnist/create_model.py new file mode 100644 index 0000000000..2ea52b6aa5 --- /dev/null +++ b/examples/models/deep_mnist/create_model.py @@ -0,0 +1,36 @@ +from tensorflow.examples.tutorials.mnist import input_data +mnist = input_data.read_data_sets("MNIST_data/", one_hot = True) +import tensorflow as tf + +if __name__ == '__main__': + + x = tf.placeholder(tf.float32, [None,784], name="x") + + W = tf.Variable(tf.zeros([784,10])) + b = tf.Variable(tf.zeros([10])) + + y = tf.nn.softmax(tf.matmul(x,W) + b, name="y") + + y_ = tf.placeholder(tf.float32, [None, 10]) + + + cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) + + train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) + + init = tf.initialize_all_variables() + + sess = tf.Session() + sess.run(init) + + for i in range(1000): + batch_xs, batch_ys = mnist.train.next_batch(100) + sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) + + correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + print(sess.run(accuracy, feed_dict = {x: mnist.test.images, y_:mnist.test.labels})) + + saver = tf.train.Saver() + + saver.save(sess, "model/deep_mnist_model") diff --git a/examples/models/deep_mnist/requirements.txt b/examples/models/deep_mnist/requirements.txt new file mode 100644 index 0000000000..6985bb18a2 --- /dev/null +++ b/examples/models/deep_mnist/requirements.txt @@ -0,0 +1 @@ +tensorflow==1.0.1 \ No newline at end of file diff --git a/examples/models/h2o_example/H2oModel.py b/examples/models/h2o_example/H2oModel.py new file mode 100644 index 0000000000..83f2c26748 --- /dev/null +++ b/examples/models/h2o_example/H2oModel.py @@ -0,0 +1,35 @@ +import numpy as np +import pandas as pd +import h2o +from h2o.frame import H2OFrame + +MODEL_PATH='/microservice/glm_fit1' + +def _to_frame(X,features_names): + """Create H2OFrame object from lists + """ + return H2OFrame(X,column_names=features_names) + +def _from_frame(frame): + """Create numpy array with probabilities from H2OFrame object + """ + preds = h2o.as_list(frame,use_pandas=False); preds.pop(0); [r.pop(0) for r in preds] + return np.asarray(preds,dtype=np.float) + +class H2oModel(): + + def __init__(self): + + print 'Starting Java virtual machine' + h2o.init(nthreads = -1, max_mem_size = 8) + print 'Machine started!' + + print 'Loading model from %s...' % MODEL_PATH + self.model = h2o.load_model(MODEL_PATH) + print 'Model Loaded' + + def predict(self,X,features_names): + return _from_frame(self.model.predict(_to_frame(X,features_names))) + + + diff --git a/examples/models/h2o_example/h2o_badloans_deployment.json b/examples/models/h2o_example/h2o_badloans_deployment.json new file mode 100644 index 0000000000..685697a81b --- /dev/null +++ b/examples/models/h2o_example/h2o_badloans_deployment.json @@ -0,0 +1,54 @@ +{ + "apiVersion": "machinelearning.seldon.io/v1alpha1", + "kind": "SeldonDeployment", + "metadata": { + "labels": { + "app": "seldon" + }, + "name": "seldon-deployment-example" + }, + "spec": { + "annotations": { + "project_name": "Bad loans prediction", + "deployment_version": "0.1" + }, + "name": "h2o-bad-loans-deployment", + "oauth_key": "oauth-key", + "oauth_secret": "oauth-secret", + "predictors": [ + { + "componentSpec": { + "spec": { + "containers": [ + { + "image": "seldonio/h2omodel:0.1", + "imagePullPolicy": "IfNotPresent", + "name": "h2o-bad-loans-classifier", + "resources": { + "requests": { + "memory": "1Mi" + } + } + } + ], + "terminationGracePeriodSeconds": 20 + } + }, + "graph": { + "children": [], + "name": "h2o-bad-loans-classifier", + "endpoint": { + "type" : "REST" + }, + "subtype": "MICROSERVICE", + "type": "MODEL" + }, + "name": "h2o-bad-loans-predictor", + "replicas": 1, + "annotations": { + "predictor_version" : "0.1" + } + } + ] + } +} diff --git a/examples/models/h2o_example/requirements.txt b/examples/models/h2o_example/requirements.txt new file mode 100644 index 0000000000..7a07d62fc6 --- /dev/null +++ b/examples/models/h2o_example/requirements.txt @@ -0,0 +1,14 @@ +numpy==1.11.2 +pandas==0.18.1 +grpc==0.3.post19 +grpcio==1.1.3 +Flask==0.11.1 +futures +redis==2.10.5 + +requests +tabulate +scikit-learn +colorama +http://h2o-release.s3.amazonaws.com/h2o/rel-wheeler/2/Python/h2o-3.16.0.2-py2.py3-none-any.whl + diff --git a/examples/models/h2o_example/train_model.py b/examples/models/h2o_example/train_model.py new file mode 100644 index 0000000000..17115aefde --- /dev/null +++ b/examples/models/h2o_example/train_model.py @@ -0,0 +1,36 @@ +"""This script run the code in https://github.com/h2oai/h2o-tutorials/blob/master/h2o-open-tour-2016/chicago/intro-to-h2o.ipynb + and save the trained model in a file glm_fit1 in the same directory of the script. + + Data is not split into train and test sets as it is irrelevant for the purpose of this example. + Instead, training is performed on the whole dataset. +""" + +# Load the H2O library and start up the H2O cluter locally on your machine +import h2o +# Import H2O GLM: +from h2o.estimators.glm import H2OGeneralizedLinearEstimator + +if __name__=="__main__": + + # Number of threads, nthreads = -1, means use all cores on your machine + # max_mem_size is the maximum memory (in GB) to allocate to H2O + h2o.init(nthreads = -1, max_mem_size = 8) + + #loan_csv = "/Volumes/H2OTOUR/loan.csv" # modify this for your machine + # Alternatively, you can import the data directly from a URL + loan_csv = "https://raw.githubusercontent.com/h2oai/app-consumer-loan/master/data/loan.csv" + data = h2o.import_file(loan_csv) # 163,987 rows x 15 columns + data['bad_loan'] = data['bad_loan'].asfactor() #encode the binary repsonse as a factor + #data['bad_loan'].levels() #optional: after encoding, this shows the two factor levels, '0' and '1' + + y = 'bad_loan' + x = list(data.columns) + x.remove(y) #remove the response + x.remove('int_rate') #remove the interest rate column because it's correlated with the outcome + + # Initialize the GLM estimator: + # Similar to R's glm() and H2O's R GLM, H2O's GLM has the "family" argument + glm_fit1 = H2OGeneralizedLinearEstimator(family='binomial', model_id='glm_fit1') + glm_fit1.train(x=x, y=y, training_frame=data) + + model_path = h2o.save_model(model=glm_fit1, path="", force=True) diff --git a/examples/models/keras_mnist/MnistClassifier.py b/examples/models/keras_mnist/MnistClassifier.py new file mode 100644 index 0000000000..78cfaf6ede --- /dev/null +++ b/examples/models/keras_mnist/MnistClassifier.py @@ -0,0 +1,14 @@ +from keras.models import load_model + +class MnistClassifier(object): + + def __init__(self): + self.model = load_model('MnistClassifier.h5') + + def predict(self,X,features_names): + assert X.shape[0]>=1, 'wrong shape 0' + if X.shape[0]==784: + X = X.reshape(1,28,28,1) + else: + X = X.reshape(X.shape[0],28,28,1) + return self.model.predict(X) diff --git a/examples/models/keras_mnist/contract.json b/examples/models/keras_mnist/contract.json new file mode 100644 index 0000000000..7034733266 --- /dev/null +++ b/examples/models/keras_mnist/contract.json @@ -0,0 +1,24 @@ +{ + "features":[ + { + "name":"loan_amnt", + "dtype":"STRING", + "ftype":"", + "range":[0,1, + "repeat":784 + } + ], + + + + + "targets":[ + { + "name":"proba", + "dtype":"FLOAT", + "ftype":"continuous", + "values":[0,1], + "repeat":2 + } + ] +} diff --git a/examples/models/keras_mnist/keras_mnist_deployment.json b/examples/models/keras_mnist/keras_mnist_deployment.json new file mode 100644 index 0000000000..5b978c4815 --- /dev/null +++ b/examples/models/keras_mnist/keras_mnist_deployment.json @@ -0,0 +1,54 @@ +{ + "apiVersion": "machinelearning.seldon.io/v1alpha1", + "kind": "SeldonDeployment", + "metadata": { + "labels": { + "app": "seldon" + }, + "name": "seldon-deployment-example" + }, + "spec": { + "annotations": { + "project_name": "Digits classification", + "deployment_version": "0.0" + }, + "name": "keras-mnist-deployment", + "oauth_key": "oauth-key", + "oauth_secret": "oauth-secret", + "predictors": [ + { + "componentSpec": { + "spec": { + "containers": [ + { + "image": "seldonio/mnistclassifier:0.0", + "imagePullPolicy": "IfNotPresent", + "name": "keras-mnist-classifier", + "resources": { + "requests": { + "memory": "1Mi" + } + } + } + ], + "terminationGracePeriodSeconds": 20 + } + }, + "graph": { + "children": [], + "name": "keras-mnist-classifier", + "endpoint": { + "type" : "REST" + }, + "subtype": "MICROSERVICE", + "type": "MODEL" + }, + "name": "keras-mnist-predictor", + "replicas": 1, + "annotations": { + "predictor_version" : "0.0" + } + } + ] + } +} diff --git a/examples/models/keras_mnist/requirements.txt b/examples/models/keras_mnist/requirements.txt new file mode 100644 index 0000000000..09d87edabc --- /dev/null +++ b/examples/models/keras_mnist/requirements.txt @@ -0,0 +1,5 @@ +scikit-learn==0.17.1 +scipy==0.18.1 +keras==2.0.6 +tensorflow==1.0.1 +h5py \ No newline at end of file diff --git a/examples/models/keras_mnist/train_mnist.py b/examples/models/keras_mnist/train_mnist.py new file mode 100644 index 0000000000..f8c908da45 --- /dev/null +++ b/examples/models/keras_mnist/train_mnist.py @@ -0,0 +1,207 @@ +#from sklearn.pipeline import Pipeline +#from sklearn.base import BaseEstimator, ClassifierMixin +import numpy as np +import math +import datetime +#from seldon.pipeline import PipelineSaver +import os +import tensorflow as tf +from keras import backend +from keras.models import Model,load_model +from keras.layers import Dense,Input +from keras.layers import Dropout +from keras.layers import Flatten +from keras.constraints import maxnorm +from keras.layers.convolutional import Convolution2D +from keras.layers.convolutional import MaxPooling2D + +from keras.callbacks import TensorBoard + +class MnistFfnn(object): + + def __init__(self, + input_shape=(784,), + nb_labels=10, + optimizer='Adam', + run_dir='tensorboardlogs_test'): + + self.model_name='MnistFfnn' + self.run_dir=run_dir + self.input_shape=input_shape + self.nb_labels=nb_labels + self.optimizer=optimizer + self.build_graph() + + def build_graph(self): + + inp = Input(shape=self.input_shape,name='input_part') + + #keras layers + with tf.name_scope('dense_1') as scope: + h1 = Dense(256, + activation='relu', + W_constraint=maxnorm(3))(inp) + drop1 = Dropout(0.2)(h1) + + with tf.name_scope('dense_2') as scope: + h2 = Dense(128, + activation='relu', + W_constraint=maxnorm(3))(drop1) + drop2 = Dropout(0.5)(h2) + + out = Dense(self.nb_labels, + activation='softmax')(drop2) + + self.model = Model(inp,out) + + if self.optimizer == 'rmsprop': + self.model.compile(loss='categorical_crossentropy', + optimizer='rmsprop', + metrics=['accuracy']) + elif self.optimizer == 'Adam': + self.model.compile(loss='categorical_crossentropy', + optimizer='Adam', + metrics=['accuracy']) + + print 'graph builded' + + def fit(self,X,y=None, + X_test=None,y_test=None, + batch_size=128, + nb_epochs=2, + shuffle=True): + + now = datetime.datetime.now() + tensorboard_logname = self.run_dir+'/{}_{}'.format(self.model_name, + now.strftime('%Y.%m.%d_%H.%M')) + tensorboard = TensorBoard(log_dir=tensorboard_logname) + + self.model.fit(X,y, + validation_data=(X_test,y_test), + callbacks=[tensorboard], + batch_size=batch_size, + nb_epoch=nb_epochs, + shuffle = shuffle) + return self + + def predict_proba(self,X): + + return self.model.predict_proba(X) + + def predict(self, X): + probas = self.model.predict_proba(X) + return([[p>0.5 for p in p1] for p1 in probas]) + + def score(self, X, y=None): + pass + + def get_class_id_map(self): + return ["proba"] + +class MnistConv(object): + + def __init__(self, + input_shape=(28,28,1), + nb_labels=10, + optimizer='Adam', + run_dir='tensorboardlogs_test', + saved_model_file='MnistClassifier.h5'): + + self.model_name='MnistConv' + self.run_dir=run_dir + self.input_shape=input_shape + self.nb_labels=nb_labels + self.optimizer=optimizer + self.saved_model_file=saved_model_file + self.build_graph() + + def build_graph(self): + + inp = Input(shape=self.input_shape,name='input_part') + + #keras layers + with tf.name_scope('conv') as scope: + conv = Convolution2D(32, 3, 3, + input_shape=(32, 32, 3), + border_mode='same', + activation='relu', + W_constraint=maxnorm(3))(inp) + drop_conv = Dropout(0.2)(conv) + max_pool = MaxPooling2D(pool_size=(2, 2))(drop_conv) + + with tf.name_scope('dense') as scope: + flat = Flatten()(max_pool) + dense = Dense(128, + activation='relu', + W_constraint=maxnorm(3))(flat) + drop_dense = Dropout(0.5)(dense) + + out = Dense(self.nb_labels, + activation='softmax')(drop_dense) + + self.model = Model(inp,out) + + if self.optimizer == 'rmsprop': + self.model.compile(loss='categorical_crossentropy', + optimizer='rmsprop', + metrics=['accuracy']) + elif self.optimizer == 'Adam': + self.model.compile(loss='categorical_crossentropy', + optimizer='Adam', + metrics=['accuracy']) + + print 'graph builded' + + def fit(self,X,y=None, + X_test=None,y_test=None, + batch_size=128, + nb_epochs=2, + shuffle=True): + + now = datetime.datetime.now() + tensorboard_logname = self.run_dir+'/{}_{}'.format(self.model_name, + now.strftime('%Y.%m.%d_%H.%M')) + tensorboard = TensorBoard(log_dir=tensorboard_logname) + + self.model.fit(X,y, + validation_data=(X_test,y_test), + callbacks=[tensorboard], + batch_size=batch_size, + nb_epoch=nb_epochs, + shuffle = shuffle) + #if not os.path.exists('saved_model'): + # os.makedirs('saved_model') + self.model.save(self.saved_model_file) + return self + + def predict_proba(self,X): + return self.model.predict_proba(X) + + def predict(self, X): + probas = self.model.predict_proba(X) + return([[p>0.5 for p in p1] for p1 in probas]) + + def score(self, X, y=None): + pass + + def get_class_id_map(self): + return ["proba"] + +from tensorflow.examples.tutorials.mnist import input_data +mnist = input_data.read_data_sets('data/MNIST_data', one_hot=True) +X_train = mnist.train.images +y_train = mnist.train.labels +X_test = mnist.test.images +y_test = mnist.test.labels + +X_train = X_train.reshape((len(X_train),28,28,1)) +X_test = X_test.reshape((len(X_test),28,28,1)) + +def main(): + mc = MnistConv((28,28,1),10) + mc.fit(X_train,y=y_train, + X_test=X_test,y_test=y_test) + + +if __name__ == "__main__": + main() diff --git a/examples/models/mean_classifier/MeanClassifier.py b/examples/models/mean_classifier/MeanClassifier.py new file mode 100644 index 0000000000..42cd333589 --- /dev/null +++ b/examples/models/mean_classifier/MeanClassifier.py @@ -0,0 +1,29 @@ +import numpy as np +import math + +def f(x): + return 1/(1+math.exp(-x)) + +class MeanClassifier(object): + + def __init__(self, intValue=0): + self.class_names = ["proba"] + assert type(intValue) == int, "intValue parameters must be an integer" + self.int_value = intValue + + print "Loading model here" + X = np.load(open("model.npy",'r')) + self.threshold_ = X.mean() + self.int_value + + def _meaning(self, x): + return f(x.mean()-self.threshold_) + + def predict(self, X, feature_names): + print X + X = np.array(X) + assert len(X.shape) == 2, "Incorrect shape" + + return [[self._meaning(x)] for x in X] + + + diff --git a/examples/models/mean_classifier/contract.json b/examples/models/mean_classifier/contract.json new file mode 100644 index 0000000000..b1e829f44d --- /dev/null +++ b/examples/models/mean_classifier/contract.json @@ -0,0 +1,19 @@ +{ + "features":[ + { + "name":"feat", + "dtype":"FLOAT", + "ftype":"continuous", + "range":["inf","inf"], + "repeat":3 + } + ], + "targets":[ + { + "name":"proba", + "dtype":"FLOAT", + "ftype":"continuous", + "values":[0,1] + } + ] +} diff --git a/examples/models/mean_classifier/model.npy b/examples/models/mean_classifier/model.npy new file mode 100644 index 0000000000..4949db57b1 Binary files /dev/null and b/examples/models/mean_classifier/model.npy differ diff --git a/examples/models/mean_classifier/requirements.txt b/examples/models/mean_classifier/requirements.txt new file mode 100644 index 0000000000..088485729f --- /dev/null +++ b/examples/models/mean_classifier/requirements.txt @@ -0,0 +1,4 @@ +numpy==1.11.2 +scikit-learn==0.17.1 +pandas==0.18.1 +scipy==0.18.1 diff --git a/examples/models/paysim_fraud_detector/FraudDetector.py b/examples/models/paysim_fraud_detector/FraudDetector.py new file mode 100644 index 0000000000..b181c0811b --- /dev/null +++ b/examples/models/paysim_fraud_detector/FraudDetector.py @@ -0,0 +1,14 @@ +import pandas as pd +import numpy as np +from sklearn.externals import joblib +from transformer import transformer + +class FraudDetector(): + + def __init__(self): + + self.p = joblib.load('model_pipeline.sav') + + def predict(self,X,features_names): + print X + return self.p.predict_proba(X) diff --git a/examples/models/paysim_fraud_detector/create_pipeline.py b/examples/models/paysim_fraud_detector/create_pipeline.py new file mode 100644 index 0000000000..a0bea98dec --- /dev/null +++ b/examples/models/paysim_fraud_detector/create_pipeline.py @@ -0,0 +1,107 @@ +import pandas as pd +import numpy as np + +from sklearn.preprocessing import LabelEncoder,MinMaxScaler +from sklearn.ensemble import RandomForestClassifier +from xgboost.sklearn import XGBClassifier +from sklearn.neural_network import MLPClassifier +from sklearn.metrics import confusion_matrix,accuracy_score, precision_recall_curve, precision_score, recall_score,roc_curve,f1_score,roc_auc_score,auc +from sklearn.model_selection import KFold +from sklearn.pipeline import Pipeline +from sklearn.externals import joblib +from keras.utils import to_categorical +from transformer import transformer +import matplotlib +import matplotlib.pyplot as plt +import seaborn as sns +import time +import argparse + +def build_dataset(df,features,split=0.7): + + X = df[features].as_matrix() + y = df.isFraud.as_matrix() + + perm = np.random.permutation(len(X)) + X = X[perm,:] + y = y[perm] + + nb_train = int(split*len(X)) + X_train_val = X[:nb_train,:] + y_train_val = y[:nb_train] + X_test = X[nb_train:,:] + y_test = y[nb_train:] + + print 'X_train_val shape:', X_train_val.shape + print 'y_train_val shape:', y_train_val.shape + print 'X_test shape:', X_test.shape + print 'y_test shape:', y_test.shape + #benchmark accuracy + bm = 1-y.sum()/float(len(y)) + print 'Benchmark accuracy (predicting all 0):', bm + + return X_train_val,X_test,y_train_val,y_test + +def calculate_print_scores(y,preds,proba): + acc_test = accuracy_score(y,preds) + auc_score_test = roc_auc_score(y,proba[:,1]) + f1_test = f1_score(y,preds) + precision = precision_score(y,preds) + recall = recall_score(y,preds) + cm_test = confusion_matrix(y,preds) + bm = 1-y.sum()/float(len(y)) + print 'confusion matrix test:' + print cm_test + print 'benchmark accuracy (predicting all 0):' + print bm + print 'accuracy:' + print acc_test + print 'precision:' + print precision + print 'recall:' + print recall + print 'f1:' + print f1_test + print 'roc auc' + print auc_score_test + +def main(): + #load data + df = pd.read_csv(args.data_path) + print 'Raw data fields', df.columns + print 'Nb os samples', len(df) + print 'Missing values?', df.isnull().values.any() + print df.head() + + features = ['type','amount','oldbalanceOrg','newbalanceOrig'] + X_train_val,X_test,y_train_val,y_test = build_dataset(df,features) + print 'sample X_train:', X_train_val[0] + print 'sample X_test', X_test[0] + tf = transformer(categorical=True) + clf = RandomForestClassifier(n_estimators=50,class_weight='balanced',verbose=1) + p = Pipeline([('trans', tf), ('clf', clf)]) + + p.fit(X_train_val,y_train_val) + + filename_p = 'model_pipeline.sav' + filename_Xtest = '../../explainers/data/paysim_data/test_data/X_test.npy' + filename_ytest = '../../explainers/data/paysim_data/test_data/y_test.npy' + + joblib.dump(p, filename_p) + np.save(filename_Xtest,X_test) + np.save(filename_ytest,y_test) + + p_loaded = joblib.load('model_pipeline.sav') + preds_test = p_loaded.predict(X_test) + proba_test = p_loaded.predict_proba(X_test) + + preds_test.sum()/float(len(preds_test)) + + calculate_print_scores(y_test,preds_test,proba_test) + print clf.feature_importances_ +if __name__=='__main__': + parser = argparse.ArgumentParser(prog="create_paysim_pipeline") + parser.add_argument('--data-path',type=str,help='path to data file',required=True) + args = parser.parse_args() + + main() diff --git a/examples/models/paysim_fraud_detector/requirements.txt b/examples/models/paysim_fraud_detector/requirements.txt new file mode 100644 index 0000000000..bdc400eecf --- /dev/null +++ b/examples/models/paysim_fraud_detector/requirements.txt @@ -0,0 +1,4 @@ +scikit-learn==0.19.0 +scipy==0.18.1 +keras==2.0.5 +tensorflow==1.0.0 diff --git a/examples/models/paysim_fraud_detector/transformer.py b/examples/models/paysim_fraud_detector/transformer.py new file mode 100644 index 0000000000..ed269b4e30 --- /dev/null +++ b/examples/models/paysim_fraud_detector/transformer.py @@ -0,0 +1,37 @@ +from keras.utils import to_categorical +from sklearn.preprocessing import LabelEncoder,MinMaxScaler +import numpy as np + +class transformer(): + + def __init__(self,categorical=True): + self.le = LabelEncoder() + self.scaler = MinMaxScaler() + self.categorical = categorical + + def fit(self,X,y=None): + + self.le.fit(X[:,0]) + X1 = X[:,1:] + self.scaler.fit(X1) + return self + + def transform(self,X): + X0 = self.le.transform(X[:,0]) + if self.categorical: + X0 = to_categorical(X0, num_classes=5) + else: + X0 = X0.reshape(-1,1) + X1 = X[:,1:] + X1_scaled = self.scaler.transform(X1) + self.X = np.hstack((X0,X1_scaled)) + print self.X.shape + return self.X + + def inverse_transform(self,X): + + X0 = self.le.inverse_transform(X[:,0].astype(int)).reshape(-1,1) + X1 = X[:,1:] + X1 = self.scaler.inverse_transform(X1) + + return np.hstack((X0,X1)) diff --git a/examples/models/sigmoid_predictor/SigmoidPredictor.py b/examples/models/sigmoid_predictor/SigmoidPredictor.py new file mode 100644 index 0000000000..9691bb122b --- /dev/null +++ b/examples/models/sigmoid_predictor/SigmoidPredictor.py @@ -0,0 +1,20 @@ +import numpy as np +from sklearn.neural_network import MLPClassifier + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + +class SigmoidPredictor(): + + def __init__(self): + + nb_samples = 10000 + X = np.random.normal(size=(nb_samples,10)) + y = (sigmoid(X[:,0]*X[:,1])>=0.5).astype(int) + #y = (sigmoid(X[:,0]*X[:,1]+(X[:,2]*X[:,3]))>=0.5).astype(int) + + self.ffnn = MLPClassifier() + self.ffnn.fit(X,y) + print "Class", self, "variables", dir(self) + def predict(self,X,features_names): + return self.ffnn.predict_proba(X) diff --git a/examples/models/sigmoid_predictor/contract.json b/examples/models/sigmoid_predictor/contract.json new file mode 100644 index 0000000000..923650f434 --- /dev/null +++ b/examples/models/sigmoid_predictor/contract.json @@ -0,0 +1,19 @@ +{ + "features":[ + { + "name":"feat", + "dtype":"FLOAT", + "ftype":"continuous", + "range":[-1,1], + "repeat":10 + }, + ], + "targets":[ + { + "name":"proba", + "dtype":"FLOAT", + "ftype":"continuous", + "range":[0,1] + } + ] +} diff --git a/examples/models/sigmoid_predictor/requirements.txt b/examples/models/sigmoid_predictor/requirements.txt new file mode 100644 index 0000000000..4585b69d2c --- /dev/null +++ b/examples/models/sigmoid_predictor/requirements.txt @@ -0,0 +1,2 @@ +scikit-learn==0.19.0 +scipy==0.18.1 diff --git a/examples/models/sklearn_iris/IrisClassifier.py b/examples/models/sklearn_iris/IrisClassifier.py new file mode 100644 index 0000000000..6b5e9e3fe2 --- /dev/null +++ b/examples/models/sklearn_iris/IrisClassifier.py @@ -0,0 +1,9 @@ +from sklearn.externals import joblib + +class IrisClassifier(object): + + def __init__(self): + self.model = joblib.load('IrisClassifier.sav') + + def predict(self,X,features_names): + return self.model.predict_proba(X) diff --git a/examples/models/sklearn_iris/requirements.txt b/examples/models/sklearn_iris/requirements.txt new file mode 100644 index 0000000000..4585b69d2c --- /dev/null +++ b/examples/models/sklearn_iris/requirements.txt @@ -0,0 +1,2 @@ +scikit-learn==0.19.0 +scipy==0.18.1 diff --git a/examples/models/sklearn_iris/sklearn_iris_deployment.json b/examples/models/sklearn_iris/sklearn_iris_deployment.json new file mode 100644 index 0000000000..92694250d6 --- /dev/null +++ b/examples/models/sklearn_iris/sklearn_iris_deployment.json @@ -0,0 +1,54 @@ +{ + "apiVersion": "machinelearning.seldon.io/v1alpha1", + "kind": "SeldonDeployment", + "metadata": { + "labels": { + "app": "seldon" + }, + "name": "seldon-deployment-example" + }, + "spec": { + "annotations": { + "project_name": "Iris classification", + "deployment_version": "0.1" + }, + "name": "sklearn-iris-deployment", + "oauth_key": "oauth-key", + "oauth_secret": "oauth-secret", + "predictors": [ + { + "componentSpec": { + "spec": { + "containers": [ + { + "image": "seldonio/irisclassifier:0.1", + "imagePullPolicy": "IfNotPresent", + "name": "sklearn-iris-classifier", + "resources": { + "requests": { + "memory": "1Mi" + } + } + } + ], + "terminationGracePeriodSeconds": 20 + } + }, + "graph": { + "children": [], + "name": "sklearn-iris-classifier", + "endpoint": { + "type" : "REST" + }, + "subtype": "MICROSERVICE", + "type": "MODEL" + }, + "name": "sklearn-iris-predictor", + "replicas": 1, + "annotations": { + "predictor_version" : "0.1" + } + } + ] + } +} diff --git a/examples/models/sklearn_iris/train_iris.py b/examples/models/sklearn_iris/train_iris.py new file mode 100644 index 0000000000..8403db8bf6 --- /dev/null +++ b/examples/models/sklearn_iris/train_iris.py @@ -0,0 +1,25 @@ +import numpy as np +import os +from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import Pipeline +from sklearn.externals import joblib +from sklearn import datasets + +def main(): + clf = LogisticRegression() + p = Pipeline([('clf', clf)]) + print 'Training model...' + p.fit(X, y) + print 'Model trained!' + + filename_p = 'IrisClassifier.sav' + print 'Saving model in %s' % filename_p + joblib.dump(p, filename_p) + print 'Model saved!' + +if __name__ == "__main__": + print 'Loading iris data set...' + iris = datasets.load_iris() + X, y = iris.data, iris.target + print 'Dataset loaded!' + main() diff --git a/examples/models/templates/ModelName.py b/examples/models/templates/ModelName.py new file mode 100644 index 0000000000..009b6a50a4 --- /dev/null +++ b/examples/models/templates/ModelName.py @@ -0,0 +1,9 @@ +from import + +class ModelName(object): + + def __init__(self): + self.model = () + + def predict(self,X,features_names): + return self.model.predict(X) diff --git a/examples/models/templates/deployment.json b/examples/models/templates/deployment.json new file mode 100644 index 0000000000..c1df8668ad --- /dev/null +++ b/examples/models/templates/deployment.json @@ -0,0 +1,54 @@ +{ + "apiVersion": "machinelearning.seldon.io/v1alpha1", + "kind": "SeldonDeployment", + "metadata": { + "labels": { + "app": "seldon" + }, + "name": "" + }, + "spec": { + "annotations": { + "project_name": "", + "deployment_version": "" + }, + "name": "", + "oauth_key": "", + "oauth_secret": "", + "predictors": [ + { + "componentSpec": { + "spec": { + "containers": [ + { + "image": "your-docker-image", + "imagePullPolicy": "IfNotPresent", + "name": "", + "resources": { + "requests": { + "memory": "<>" + } + } + } + ], + "terminationGracePeriodSeconds": <> + } + }, + "graph": { + "children": [], + "name": "", + "endpoint": { + "type" : "" #REST or GRPS + }, + "subtype": "MICROSERVICE", + "type": "MODEL" + }, + "name": "", + "replicas": <>, #number of replicas + "annotations": { + "predictor_version" : "" + } + } + ] + } +} diff --git a/examples/models/templates/requirements.txt b/examples/models/templates/requirements.txt new file mode 100644 index 0000000000..bddfdbe5e9 --- /dev/null +++ b/examples/models/templates/requirements.txt @@ -0,0 +1 @@ +### your pip installable package here ### diff --git a/examples/transformers/mean_transformer/MeanTransformer.py b/examples/transformers/mean_transformer/MeanTransformer.py new file mode 100644 index 0000000000..60bbbe548e --- /dev/null +++ b/examples/transformers/mean_transformer/MeanTransformer.py @@ -0,0 +1,12 @@ +import numpy as np + +class MeanTransformer(object): + + def __init__(self): + pass + + def transform_input(self, X, feature_names): + X = np.array(X) + if X.max() == X.min(): + return np.zeros_like(X) + return (X-X.min())/(X.max()-X.min()) diff --git a/examples/wrap-model-in-host b/examples/wrap-model-in-host new file mode 100755 index 0000000000..6ee99d5418 --- /dev/null +++ b/examples/wrap-model-in-host @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +MODEL_DIR=$1 +shift +WRAP_MODEL_PARAMS="$@" + +cd $MODEL_DIR +BASE_LOCAL_DIR=$(pwd) + +BASE_VM_DIR=${BASE_LOCAL_DIR} + +set -x +unset DOCKER_TLS_VERIFY +unset DOCKER_HOST +unset DOCKER_CERT_PATH +unset DOCKER_API_VERSION +docker run --rm -it \ + -v ${BASE_VM_DIR}:/work seldonio/core-python-wrapper:0.3 \ + bash -c "rm -rfv /work/build && cd /wrappers/python && python wrap_model.py /work $WRAP_MODEL_PARAMS && ls -1 /work/build" +set +x +cd build && make build_docker_image + diff --git a/examples/wrap-model-in-minikube b/examples/wrap-model-in-minikube new file mode 100755 index 0000000000..5d1ab43a0e --- /dev/null +++ b/examples/wrap-model-in-minikube @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -o nounset +set -o errexit +set -o pipefail + +MODEL_DIR=$1 +shift +WRAP_MODEL_PARAMS="$@" + +cd $MODEL_DIR +BASE_LOCAL_DIR=$(pwd) + +UNAME_S=$(uname -s) + +BASE_VM_DIR=UNKOWN +if [ ${UNAME_S} = "Darwin" ]; then + BASE_VM_DIR=${BASE_LOCAL_DIR} +fi +if [ ${UNAME_S} = "Linux" ]; then + BASE_VM_DIR=$(echo ${BASE_LOCAL_DIR}|sed -e 's|^/home/|/hosthome/|') +fi + +set -x +eval $(minikube docker-env) +docker run --rm -it \ + -v ${BASE_VM_DIR}:/work seldonio/core-python-wrapper:0.3 \ + bash -c "rm -rfv /work/build && cd /wrappers/python && python wrap_model.py /work $WRAP_MODEL_PARAMS && ls -1 /work/build" +set +x +cd build && make build_docker_image +