From 9367f73e3dec075a1ee64df47f171d25581629f6 Mon Sep 17 00:00:00 2001
From: "pieths.dev@gmail.com" <PH Schouten>
Date: Wed, 2 Oct 2019 17:01:13 -0700
Subject: [PATCH 1/5] Save predictor_model when pickling a pipeline.

---
 src/python/nimbusml/pipeline.py               | 34 ++++++++++++---
 .../nimbusml/tests/pipeline/test_load_save.py | 43 ++++++++++++++++++-
 2 files changed, 71 insertions(+), 6 deletions(-)
diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py
index 51c50ea2..899f8088 100644
--- a/src/python/nimbusml/pipeline.py
+++ b/src/python/nimbusml/pipeline.py
@@ -2482,7 +2482,7 @@ def load_model(self, src):
         self.steps = []
 
     def __getstate__(self):
-        odict = {'export_version': 1}
+        odict = {'export_version': 2}
 
         if hasattr(self, 'steps'):
             odict['steps'] = self.steps
@@ -2494,6 +2494,13 @@ def __getstate__(self):
             with open(self.model, "rb") as f:
                 odict['modelbytes'] = f.read()
 
+        if (hasattr(self, 'predictor_model') and 
+            self.predictor_model is not None and
+            os.path.isfile(self.predictor_model)):
+
+            with open(self.predictor_model, "rb") as f:
+                odict['predictor_model_bytes'] = f.read()
+
         return odict
 
     def __setstate__(self, state):
@@ -2501,11 +2508,18 @@ def __setstate__(self, state):
         self.model = None
         self.random_state = None
 
-        for k, v in state.items():
-            if k not in {'modelbytes', 'export_version'}:
-                setattr(self, k, v)
+        if state.get('export_version', 0) == 0:
+            # Pickled pipelines which were created
+            # before export_version was added used
+            # the default implementation which uses
+            # the instance’s __dict__.
+            if 'steps' in state:
+                self.steps = state['steps']
+
+        elif state.get('export_version', 0) in {1, 2}:
+            if 'steps' in state:
+                self.steps = state['steps']
 
-        if state.get('export_version', 0) == 1:
             if 'modelbytes' in state:
                 (fd, modelfile) = tempfile.mkstemp()
                 fl = os.fdopen(fd, "wb")
@@ -2513,6 +2527,16 @@ def __setstate__(self, state):
                 fl.close()
                 self.model = modelfile
 
+            if 'predictor_model_bytes' in state:
+                (fd, modelfile) = tempfile.mkstemp()
+                fl = os.fdopen(fd, "wb")
+                fl.write(state['predictor_model_bytes'])
+                fl.close()
+                self.predictor_model = modelfile
+
+        else:
+            raise ValueError('Pipeline version not supported.')
+
     @trace
     def score(
             self,
diff --git a/src/python/nimbusml/tests/pipeline/test_load_save.py b/src/python/nimbusml/tests/pipeline/test_load_save.py
index fc112fe5..1d514959 100644
--- a/src/python/nimbusml/tests/pipeline/test_load_save.py
+++ b/src/python/nimbusml/tests/pipeline/test_load_save.py
@@ -7,10 +7,13 @@
 import pickle
 import unittest
 
+import numpy as np
+import pandas as pd
+
 from nimbusml import Pipeline
 from nimbusml.datasets import get_dataset
 from nimbusml.feature_extraction.categorical import OneHotVectorizer
-from nimbusml.linear_model import FastLinearBinaryClassifier
+from nimbusml.linear_model import FastLinearBinaryClassifier, OnlineGradientDescentRegressor
 from nimbusml.utils import get_X_y
 from numpy.testing import assert_almost_equal
 
@@ -326,5 +329,43 @@ def test_predictor_loaded_from_zip_has_feature_contributions(self):
 
         os.remove(model_filename)
 
+    def test_pickled_pipeline_with_predictor_model(self):
+        train_data = {'c1': [1, 2, 3, 4], 'c2': [2, 3, 4, 5]}
+        train_df = pd.DataFrame(train_data).astype({'c1': np.float64,
+                                                    'c2': np.float64})
+
+        test_data = {'c1': [1.5, 2.3, 3.7], 'c2': [2.2, 4.9, 2.7]}
+        test_df = pd.DataFrame(test_data).astype({'c1': np.float64,
+                                                  'c2': np.float64})
+
+        # Create predictor model and use it to predict
+        pipeline = Pipeline([OnlineGradientDescentRegressor(label='c2')], random_state=0)
+        pipeline.fit(train_df, output_predictor_model=True)
+        result_1 = pipeline.predict(test_df)
+
+        self.assertTrue(pipeline.model)
+        self.assertTrue(pipeline.predictor_model)
+        self.assertNotEqual(pipeline.model, pipeline.predictor_model)
+
+        pickle_filename = 'nimbusml_model.p'
+        with open(pickle_filename, 'wb') as f:
+            pickle.dump(pipeline, f)
+
+        os.remove(pipeline.model)
+        os.remove(pipeline.predictor_model)
+
+        with open(pickle_filename, "rb") as f:
+            pipeline_pickle = pickle.load(f)
+
+        os.remove(pickle_filename)
+
+        # Load predictor pipeline and score data
+        predictor_pipeline = Pipeline()
+        predictor_pipeline.load_model(pipeline_pickle.predictor_model)
+        result_2 = predictor_pipeline.predict(test_df)
+
+        self.assertTrue(result_1.equals(result_2))
+
+
 if __name__ == '__main__':
     unittest.main()

From b45e855d3f1275d4b00a42af90e8bdec0d7a6101 Mon Sep 17 00:00:00 2001
From: "pieths.dev@gmail.com" <PH Schouten>
Date: Wed, 2 Oct 2019 19:01:31 -0700
Subject: [PATCH 2/5] Add whitespace to restart the ci run because the linux
 builds never completed.

---
 src/python/nimbusml/tests/pipeline/test_load_save.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/nimbusml/tests/pipeline/test_load_save.py b/src/python/nimbusml/tests/pipeline/test_load_save.py
index 1d514959..19bc26ce 100644
--- a/src/python/nimbusml/tests/pipeline/test_load_save.py
+++ b/src/python/nimbusml/tests/pipeline/test_load_save.py
@@ -338,7 +338,7 @@ def test_pickled_pipeline_with_predictor_model(self):
         test_df = pd.DataFrame(test_data).astype({'c1': np.float64,
                                                   'c2': np.float64})
 
-        # Create predictor model and use it to predict
+        # Create predictor model and use it to predict 
         pipeline = Pipeline([OnlineGradientDescentRegressor(label='c2')], random_state=0)
         pipeline.fit(train_df, output_predictor_model=True)
         result_1 = pipeline.predict(test_df)

From 665f5f579901b9a281cc916a4ccc2db0537ca96a Mon Sep 17 00:00:00 2001
From: "pieths.dev@gmail.com" <PH Schouten>
Date: Wed, 2 Oct 2019 19:27:35 -0700
Subject: [PATCH 3/5] Whitespace change to restart the ci run because a linux
 build failed unusually.

---
 src/python/nimbusml/tests/pipeline/test_load_save.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/nimbusml/tests/pipeline/test_load_save.py b/src/python/nimbusml/tests/pipeline/test_load_save.py
index 19bc26ce..1d514959 100644
--- a/src/python/nimbusml/tests/pipeline/test_load_save.py
+++ b/src/python/nimbusml/tests/pipeline/test_load_save.py
@@ -338,7 +338,7 @@ def test_pickled_pipeline_with_predictor_model(self):
         test_df = pd.DataFrame(test_data).astype({'c1': np.float64,
                                                   'c2': np.float64})
 
-        # Create predictor model and use it to predict 
+        # Create predictor model and use it to predict
         pipeline = Pipeline([OnlineGradientDescentRegressor(label='c2')], random_state=0)
         pipeline.fit(train_df, output_predictor_model=True)
         result_1 = pipeline.predict(test_df)

From 9a288d3488b21fe8764105645c1bceb67aa3360e Mon Sep 17 00:00:00 2001
From: "pieths.dev@gmail.com" <PH Schouten>
Date: Thu, 3 Oct 2019 09:41:43 -0700
Subject: [PATCH 4/5] Whitespace modification to restart ci build.

---
 src/python/nimbusml/tests/pipeline/test_load_save.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/nimbusml/tests/pipeline/test_load_save.py b/src/python/nimbusml/tests/pipeline/test_load_save.py
index 1d514959..19bc26ce 100644
--- a/src/python/nimbusml/tests/pipeline/test_load_save.py
+++ b/src/python/nimbusml/tests/pipeline/test_load_save.py
@@ -338,7 +338,7 @@ def test_pickled_pipeline_with_predictor_model(self):
         test_df = pd.DataFrame(test_data).astype({'c1': np.float64,
                                                   'c2': np.float64})
 
-        # Create predictor model and use it to predict
+        # Create predictor model and use it to predict 
         pipeline = Pipeline([OnlineGradientDescentRegressor(label='c2')], random_state=0)
         pipeline.fit(train_df, output_predictor_model=True)
         result_1 = pipeline.predict(test_df)

From 8437cedf7409050ddc344f299a74e2144a5bb548 Mon Sep 17 00:00:00 2001
From: "pieths.dev@gmail.com" <PH Schouten>
Date: Thu, 3 Oct 2019 12:18:16 -0700
Subject: [PATCH 5/5] Remove non-ascii character which was accidentally copy
 and pasted in to comment.

---
 src/python/nimbusml/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py
index 899f8088..21983d4a 100644
--- a/src/python/nimbusml/pipeline.py
+++ b/src/python/nimbusml/pipeline.py
@@ -2512,7 +2512,7 @@ def __setstate__(self, state):
             # Pickled pipelines which were created
             # before export_version was added used
             # the default implementation which uses
-            # the instance’s __dict__.
+            # the instances __dict__.
             if 'steps' in state:
                 self.steps = state['steps']