Skip to content

Commit

Permalink
Fix for onnx 1.7 release (#381)
Browse files Browse the repository at this point in the history
  • Loading branch information
wenbingl authored Apr 3, 2020
1 parent 2cc981a commit 6546438
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 161 deletions.
17 changes: 15 additions & 2 deletions onnxmltools/proto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ def _check_onnx_version():
import pkg_resources
min_required_version = pkg_resources.parse_version('1.0.1')
current_version = pkg_resources.get_distribution('onnx').parsed_version
assert current_version >= min_required_version , 'ONNXMLTools requires ONNX version 1.0.1 or a newer one'
assert current_version >= min_required_version, 'ONNXMLTools requires ONNX version 1.0.1 or a newer one'


_check_onnx_version()

# Rather than using ONNX protobuf definition throughout our codebase, we import ONNX protobuf definition here so that
Expand All @@ -21,6 +23,8 @@ def _check_onnx_version():
from onnx import mapping
from onnx.onnx_pb import TensorProto
from onnx.helper import split_complex_to_pairs


def _make_tensor_fixed(name, data_type, dims, vals, raw=False):
'''
Make a TensorProto with specified arguments. If raw is False, this
Expand Down Expand Up @@ -51,4 +55,13 @@ def _make_tensor_fixed(name, data_type, dims, vals, raw=False):


def get_opset_number_from_onnx():
return onnx.defs.onnx_opset_version()
# since the method was widely used among while it is buggy to get the opset number...
# ... blindly, so change it to be safer without the name change.

default_max_opset = 11
try:
from onnxconverter_common.topology import DEFAULT_OPSET_NUMBER
default_max_opset = DEFAULT_OPSET_NUMBER
except: # noqa
pass
return min(default_max_opset, onnx.defs.onnx_opset_version())
133 changes: 0 additions & 133 deletions tests/sciikit-learn/test_sklearn_converters.py

This file was deleted.

47 changes: 21 additions & 26 deletions tests/xgboost/test_xgboost_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,41 @@
import numpy as np
from numpy.testing import assert_almost_equal
import pandas

try:
import onnxruntime as rt
from xgboost import XGBRegressor, XGBClassifier, train, DMatrix
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from onnxmltools.convert import convert_xgboost
from onnxmltools.convert import convert_xgboost, convert_sklearn
from onnxmltools.convert.common.data_types import FloatTensorType
from onnxmltools.utils import dump_data_and_model
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost as convert_xgb
from onnxmltools.proto import get_opset_number_from_onnx

can_test = True
except ImportError:
# python 2.7
can_test = False
try:
from skl2onnx import update_registered_converter, to_onnx
from skl2onnx import update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes

can_test |= True
except ImportError:
# sklearn-onnx not recent enough
can_test = False


@unittest.skipIf(sys.version_info[:2] <= (3, 5), reason="not available")
@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
@unittest.skipIf(not can_test,
reason="sklearn-onnx not recent enough")
class TestXGBoostModelsPipeline(unittest.TestCase):

def _column_tranformer_fitted_from_df(self, data):
def transformer_for_column(column):
if column.dtype in ['float64', 'float32']:
Expand All @@ -48,7 +57,6 @@ def transformer_for_column(column):
remainder='drop'
).fit(data)


def _convert_dataframe_schema(self, data):
def type_for_column(column):
if column.dtype in ['float64', 'float32']:
Expand All @@ -63,40 +71,25 @@ def type_for_column(column):
raise ValueError()

res = [(col, type_for_column(data[col])) for col in data.columns]
return res
return res

@unittest.skipIf(sys.version_info[:2] <= (3, 5), reason="not available")
@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
@unittest.skipIf(not can_test,
reason="sklearn-onnx not recent enough")
def test_xgboost_10_skl_missing(self):
self.common_test_xgboost_10_skl(np.nan)

@unittest.skipIf(sys.version_info[:2] <= (3, 5), reason="not available")
@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
@unittest.skipIf(not can_test,
reason="sklearn-onnx not recent enough")
def test_xgboost_10_skl_zero(self):
try:
self.common_test_xgboost_10_skl(0., True)
except RuntimeError as e:
assert "Cannot convert a XGBoost model where missing values" in str(e)

@unittest.skipIf(sys.version_info[:2] <= (3, 5), reason="not available")
@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
@unittest.skipIf(not can_test,
reason="sklearn-onnx not recent enough")
def test_xgboost_10_skl_zero_replace(self):
self.common_test_xgboost_10_skl(np.nan, True)

def common_test_xgboost_10_skl(self, missing, replace=False):
this = os.path.abspath(os.path.dirname(__file__))
data = os.path.join(this, "data_fail.csv")
data = pandas.read_csv(data)

for col in data:
dtype = data[col].dtype
if dtype in ['float64', 'float32']:
Expand All @@ -112,9 +105,9 @@ def common_test_xgboost_10_skl(self, missing, replace=False):

train_df, test_df, train_labels, test_labels = train_test_split(
full_df, full_labels, test_size=.2, random_state=11)

col_transformer = self._column_tranformer_fitted_from_df(full_df)

param_distributions = {
"colsample_bytree": 0.5,
"gamma": 0.2,
Expand All @@ -130,7 +123,7 @@ def common_test_xgboost_10_skl(self, missing, replace=False):
regressor.fit(col_transformer.transform(train_df), train_labels)
model = Pipeline(steps=[('preprocessor', col_transformer),
('regressor', regressor)])

update_registered_converter(
XGBRegressor, 'XGBRegressor',
calculate_linear_regressor_output_shapes,
Expand All @@ -140,7 +133,9 @@ def common_test_xgboost_10_skl(self, missing, replace=False):
input_xgb = model.steps[0][-1].transform(test_df[:5]).astype(np.float32)
if replace:
input_xgb[input_xgb[:, :] == missing] = np.nan
onnx_last = to_onnx(model.steps[1][-1], input_xgb)
onnx_last = convert_sklearn(model.steps[1][-1],
initial_types=[('X', FloatTensorType(shape=[None, input_xgb.shape[1]]))],
target_opset=get_opset_number_from_onnx())
session = rt.InferenceSession(onnx_last.SerializeToString())
pred_skl = model.steps[1][-1].predict(input_xgb).ravel()
pred_onx = session.run(None, {'X': input_xgb})[0].ravel()
Expand Down

0 comments on commit 6546438

Please sign in to comment.