Skip to content
This repository has been archived by the owner on Nov 16, 2023. It is now read-only.

Add support for returning custom values when overriding Pipeline.pred… #155

Merged
merged 3 commits into from
Jul 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/python/nimbusml.pyproj
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@
<Compile Include="nimbusml\tests\ensemble\__init__.py" />
<Compile Include="nimbusml\tests\feature_extraction\text\test_sentiment.py" />
<Compile Include="nimbusml\tests\idv\__init__.py" />
<Compile Include="nimbusml\tests\pipeline\test_pipeline_subclassing.py" />
<Compile Include="nimbusml\tests\preprocessing\normalization\test_meanvariancescaler.py" />
<Compile Include="nimbusml\tests\timeseries\test_iidchangepointdetector.py" />
<Compile Include="nimbusml\tests\timeseries\test_ssachangepointdetector.py" />
Expand Down
4 changes: 2 additions & 2 deletions src/python/nimbusml/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,7 @@ def predict_proba(self, X, verbose=0, **params):
last_node = self.last_node
last_node._check_implements_method('predict_proba')

scores = self.predict(X, verbose, **params)
scores, _ = self._predict(X, verbose=verbose, **params)

# REVIEW: Consider adding an entry point that extracts the
# probability column instead.
Expand Down Expand Up @@ -1883,7 +1883,7 @@ def decision_function(self, X, verbose=0, **params):
last_node = self.last_node
last_node._check_implements_method('decision_function')

scores = self.predict(X, verbose, **params)
scores, _ = self._predict(X, verbose=verbose, **params)

# REVIEW: Consider adding an entry point that extracts the score
# column instead.
Expand Down
73 changes: 73 additions & 0 deletions src/python/nimbusml/tests/pipeline/test_pipeline_subclassing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------------------------
import os
import unittest

import numpy as np
import pandas as pd
from nimbusml import Pipeline
from nimbusml.linear_model import LogisticRegressionBinaryClassifier


def generate_dataset_1():
X = pd.DataFrame({'x1': [2, 3, 2, 2, 8, 9, 10, 8],
'x2': [1, 2, 3, 1, 7, 10, 9, 8]})
y = pd.DataFrame({'y': [1, 1, 1, 1, 0, 0, 0, 0]})
return X, y


class CustomPipeline(Pipeline):
# Override the predict method
def predict(self, X, *args, **kwargs):
return kwargs.get('test_return_value')


class TestPipelineSubclassing(unittest.TestCase):

def test_pipeline_subclass_can_override_predict(self):
X, y = generate_dataset_1()

pipeline = Pipeline([LogisticRegressionBinaryClassifier()])
pipeline.fit(X, y)
result = pipeline.predict(X)['PredictedLabel']

self.assertTrue(np.array_equal(result.values, y['y'].values))

pipeline = CustomPipeline([LogisticRegressionBinaryClassifier()])
pipeline.fit(X, y)

self.assertEqual(pipeline.predict(X, test_return_value=3), 3)


def test_pipeline_subclass_correctly_supports_predict_proba(self):
X, y = generate_dataset_1()

pipeline = Pipeline([LogisticRegressionBinaryClassifier()])
pipeline.fit(X, y)
orig_result = pipeline.predict_proba(X)

pipeline = CustomPipeline([LogisticRegressionBinaryClassifier()])
pipeline.fit(X, y)
new_result = pipeline.predict_proba(X)

self.assertTrue(np.array_equal(orig_result, new_result))


def test_pipeline_subclass_correctly_supports_decision_function(self):
X, y = generate_dataset_1()

pipeline = Pipeline([LogisticRegressionBinaryClassifier()])
pipeline.fit(X, y)
orig_result = pipeline.decision_function(X)

pipeline = CustomPipeline([LogisticRegressionBinaryClassifier()])
pipeline.fit(X, y)
new_result = pipeline.decision_function(X)

self.assertTrue(np.array_equal(orig_result, new_result))


if __name__ == '__main__':
unittest.main()