From 7adb78ebbc8ed0b061e29396d9f271e5d8e9561a Mon Sep 17 00:00:00 2001 From: Najeeb Kazmi Date: Wed, 20 Nov 2019 15:37:54 -0800 Subject: [PATCH 1/3] Improve performance pf inferencing with ML.NET models loaded into Pipeline --- src/python/nimbusml/pipeline.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py index 71ee437d..92c4b5e2 100644 --- a/src/python/nimbusml/pipeline.py +++ b/src/python/nimbusml/pipeline.py @@ -2073,13 +2073,9 @@ def _predict(self, X, y=None, isinstance(X, DataFrame) and isinstance(y, (str, tuple))): y = y_temp - is_transformer_chain = False - with ZipFile(self.model) as model_zip: - is_transformer_chain = any('TransformerChain' in item - for item in model_zip.namelist()) - all_nodes = [] - if is_transformer_chain: + if (hasattr(self, '_is_transformer_chain') and + self._is_transformer_chain): inputs = dict([('data', ''), ('transform_model', self.model)]) if isinstance(X, FileDataStream): importtext_node = data_customtextloader( @@ -2617,6 +2613,10 @@ def load_model(self, src): self.model = src self.steps = [] + with ZipFile(self.model) as model_zip: + self._is_transformer_chain = any('TransformerChain' in item + for item in model_zip.namelist()) + def __getstate__(self): odict = {'export_version': 2} From d142639458a75b5419d3525d5c34c6e1b65cfe9d Mon Sep 17 00:00:00 2001 From: "REDMOND\\nakazmi" Date: Thu, 21 Nov 2019 17:05:34 -0800 Subject: [PATCH 2/3] Handle predictions without PredictedLabel column --- src/python/nimbusml/pipeline.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py index 92c4b5e2..a114abbd 100644 --- a/src/python/nimbusml/pipeline.py +++ b/src/python/nimbusml/pipeline.py @@ -2167,10 +2167,12 @@ def _predict(self, X, y=None, self._run_time = time.time() - start_time raise e - if is_transformer_chain: + if (hasattr(self, '_is_transformer_chain') + and self._is_transformer_chain + and hasattr(out_data, 'PredictedLabel') + and out_data.PredictedLabel.dtype == 'bool'): out_data['PredictedLabel'] = out_data['PredictedLabel']*1 - if y is not None: # We need to fix the schema for ranking metrics if evaltype == 'ranking': From aa10245832950fe7bd3a883bec8e870fc5645636 Mon Sep 17 00:00:00 2001 From: Najeeb Kazmi Date: Fri, 22 Nov 2019 17:23:09 -0800 Subject: [PATCH 3/3] Add small model and test --- .../nimbusml/tests/pipeline/test_load_save.py | 10 +++++++++- .../tests/utils/models/UciAdultMlNetModel.zip | Bin 0 -> 3653 bytes 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 src/python/nimbusml/tests/utils/models/UciAdultMlNetModel.zip diff --git a/src/python/nimbusml/tests/pipeline/test_load_save.py b/src/python/nimbusml/tests/pipeline/test_load_save.py index 3807507e..9bb29a14 100644 --- a/src/python/nimbusml/tests/pipeline/test_load_save.py +++ b/src/python/nimbusml/tests/pipeline/test_load_save.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from nimbusml import Pipeline +from nimbusml import FileDataStream, Pipeline from nimbusml.datasets import get_dataset from nimbusml.feature_extraction.categorical import OneHotVectorizer from nimbusml.linear_model import FastLinearBinaryClassifier, OnlineGradientDescentRegressor @@ -33,6 +33,8 @@ (train, label) = get_X_y(train_file, label_column, sep=',') (test, test_label) = get_X_y(test_file, label_column, sep=',') +mlnet_model = os.path.join(os.path.dirname(__file__), '..', 'utils', 'models', 'UciAdultMlNetModel.zip') + def get_temp_file(suffix=None): fd, file_name = tempfile.mkstemp(suffix=suffix) fl = os.fdopen(fd, 'w') @@ -148,6 +150,12 @@ def test_model_datastream(self): os.remove(model_filename) + def test_mlnet_model_can_be_scored(self): + data = FileDataStream.read_csv(test_file, sep=',', numeric_dtype=np.float32) + model = Pipeline() + model.load_model(mlnet_model) + model.predict(data) + def test_pipeline_saves_complete_model_file_when_pickled(self): model_nimbusml = Pipeline( steps=[ diff --git a/src/python/nimbusml/tests/utils/models/UciAdultMlNetModel.zip b/src/python/nimbusml/tests/utils/models/UciAdultMlNetModel.zip new file mode 100644 index 0000000000000000000000000000000000000000..6bb4318c8b95efc287e9ff7605de39854df85efa GIT binary patch literal 3653 zcmbVP2|Uw(A775iZA{GyV>9B*nWKqKqvQ^eRF0H%P^LugN~PwO(3Gwx z9*SIDa^xyk&(G8IwCvUO`fsn__S^P)y+5D#`}_VJv$y8tf`UMxwV+G;LhT@EW3E)- zQv-0pfXj(SBvZ%~e=CX~)x*V?7DA>{)Itx09&g78H6oy(&X>)G6&vy<@*N7?^YhUW z9P@dJZiRVu3Aww{`WJJD49hk-MeeFB*g1gv*i#XBVK#GMmS((*IwuW1hk<2{+SFEz z5GFS1Q{4~|9Tv$l4sHgij-nL$s1@p{jNg`s9W-Xb#etfNoY2Dkh)Ry_X5;lm?r~!x z)25^}`#qx(RbO<=Ts`eqaga|x^6&|ed;b~`NZ}g?N{Ano7UWAa+YQk107!lN!c#*- z!^4*9;~S{9$2Yvx4&bCAz|fQhgDt>P2SvDL3h=68MeX)7n5-w;#5?Ks;F#u2SU8{m zYgu&j!JMPfv-hm92^xIs3b=VVwtMHenplRrhI@wFRZExZ#Q1n0=DD}@KJ<8yL+(_& zl=D=_n8A47-jd1LrtaA6DM-&*-B>MLg?SQZO&b=9BNATQ3>9L$JA#D^i;l>E-5mE6 zDHJNcZt#s=Oj3yQ&?_CG6wMU2XTR{_{Xloipq_aUZN0xdSwHBJ4Z2`je?hGv${c^Q z>5#~U%7AsI&0&dlt95}jAljw?J)j(ap$mqkOO+jl(r6;G@uF>pEEWn42OU9(5D173 zx|Jk8<)ythkzBo2Eoz@3bvAyJE#Ygzc8xPfd&hQ)BCVQido6`UKefSc@sfs-!~DaJ z!>02RC0biKrB1G8TrYU2H2CmxIsSe8d%|E~&0JaLWG8hhbHro8^Q&suPJ>h`byE1W zS6iCcGtXFJIly6dJ3!=C{hd zQWjJ`m&*f(aG<)7ayoh!xdqL)$j?DYwii0p-$q|09r95;TS(eIkz9pQ33Yq1Yy#EM z9PLufSG$D0sx|Ut&uQfXrt8qP``LB!_3ZQ%8qcK-Ng@P@eT%^8#OizjhGgIzsJhZE=-+O~rd=Xl<1lF*3#FlID=cPB%Lkyh6_ z^~xWvFY2T~c&fQ6yRzVIG)y)@5e=N;74)sGae^MRZ#>144zJ5v%wn887GEn@cDtcg z0x}s_-P=NJTc+@_vBpT-P$}W9n{k zH@#21YEyoseFVqZfu`%TQnv!Ie0@ikS zPc_V9D$a}W65_c>Zr7i9dSXX@qm=Q7;&NW3$>9@=shh1W%=;j^=k04Hl0LV%&E~j8 z_cO`d$TQgvVI>P)6Dj8vn$Du{__~-AnRlo8SDWI^E?oT`fcJW!Co27!|9{7!BWbs9 z5HVzSh8WBn;_XH9;xdCtGFw65MH>+~*o{zaCWX3!LZLK_=Ev(RhD`8h<6%urNY65l zGqiVY8CL$wUA-?u6qDjIwDjcbn$HC@^NnTFa(sgT!i$&z}uA*&Hyymmc{PU_SCU)AE1K?c20$0GgRd z^uGPU>v*IQbT0YR0dn4_!H^epTew5pO`#>E=kQxY22r1edHRl_JLR|ERrzhAbRpkH zT&uiGw49S_`Sw>83`(_b4$=c$+hn>|y+hV932hvh+2!_l{oU^9uyZ^*p0R1m&o{O# z93Q`BD}uzv@e2@A#b@=lR9b8cY^7OXZO>`ntNVIBxf3h4K}k?tQCq?yxO+Sb3nh&i zN8@1xak2G2kPq~=ZDGx{SI1t8?x7fpwzU{Vx7Hq&qGqH^cKRmIZFi2V>#v#STy2fD zhyEqq2ne&%FW^73#`iL~jPF>HV!6HHIr0DqA~!IE2ukd7K5XIPTJ^Y4r~7@whHx&+ zw2>X#dX+HO`YHodcAw4%}M;0wr$D!8dW#l8g&p#&NJMY*ZgWh@Tm5_WszIUSpx?=lytQSmZ z+ohq?L*_m{H{?hiyzeqA{TPFNqdTrenAVdmmzmA!27WJF(p{U6&B8=1ESY?dX2MTL zv01`@m{8&jFs1^`!NB{^3@?a9qzhdp9+Ws`IJ9OD-rFHS=&oZ-3_ieHiWAv2-uios z3gC%q0>-rufxv4xprBvxpFs2<9>3Z~*-_XxKo&$ZaQl7z`@=rSj>^72u~2&fp?-_{ z!#2f^%D!B%P{ld_fcl$NgFOoSP-aCj|@~vY@i^1d(@gMe=o?CZ#Bc7GkddT;eO)zBknJ=W$#ohRDRx{p#Bnv fz3y2!+