diff --git a/onnxmltools/proto/__init__.py b/onnxmltools/proto/__init__.py index ffcf513e..c4a20cc0 100644 --- a/onnxmltools/proto/__init__.py +++ b/onnxmltools/proto/__init__.py @@ -8,7 +8,9 @@ def _check_onnx_version(): import pkg_resources min_required_version = pkg_resources.parse_version('1.0.1') current_version = pkg_resources.get_distribution('onnx').parsed_version - assert current_version >= min_required_version , 'ONNXMLTools requires ONNX version 1.0.1 or a newer one' + assert current_version >= min_required_version, 'ONNXMLTools requires ONNX version 1.0.1 or a newer one' + + _check_onnx_version() # Rather than using ONNX protobuf definition throughout our codebase, we import ONNX protobuf definition here so that @@ -21,6 +23,8 @@ def _check_onnx_version(): from onnx import mapping from onnx.onnx_pb import TensorProto from onnx.helper import split_complex_to_pairs + + def _make_tensor_fixed(name, data_type, dims, vals, raw=False): ''' Make a TensorProto with specified arguments. If raw is False, this @@ -51,4 +55,13 @@ def _make_tensor_fixed(name, data_type, dims, vals, raw=False): def get_opset_number_from_onnx(): - return onnx.defs.onnx_opset_version() + # since the method was widely used among while it is buggy to get the opset number... + # ... blindly, so change it to be safer without the name change. + + default_max_opset = 11 + try: + from onnxconverter_common.topology import DEFAULT_OPSET_NUMBER + default_max_opset = DEFAULT_OPSET_NUMBER + except: # noqa + pass + return min(default_max_opset, onnx.defs.onnx_opset_version()) diff --git a/tests/sciikit-learn/test_sklearn_converters.py b/tests/sciikit-learn/test_sklearn_converters.py deleted file mode 100644 index da6f6b0d..00000000 --- a/tests/sciikit-learn/test_sklearn_converters.py +++ /dev/null @@ -1,133 +0,0 @@ -""" -Tests scilit-learn's tree-based methods' converters. -""" -import sys -import unittest -import numpy as np -from sklearn.datasets import load_diabetes, load_iris, make_classification -from sklearn.model_selection import train_test_split -from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier -from onnxmltools.convert import convert_sklearn -from onnxmltools.convert.common.data_types import FloatTensorType -from onnxmltools.utils import dump_data_and_model - - -def _fit_classification_model(model, n_classes, is_str=False): - x, y = make_classification(n_classes=n_classes, n_features=100, - n_samples=1000, - random_state=42, n_informative=7) - y = y.astype(np.str) if is_str else y.astype(np.int64) - x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, - random_state=42) - model.fit(x_train, y_train) - return model, x_test.astype(np.float32) - - -class TestScikitLearnModels(unittest.TestCase): - - @unittest.skipIf(sys.version_info[0] == 2, - reason="sklearn converter not tested on python 2") - def test_sklearn_regressor(self): - iris = load_diabetes() - x = iris.data - y = iris.target - x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5, - random_state=42) - xgb = RandomForestRegressor() - xgb.fit(x_train, y_train) - conv_model = convert_sklearn( - xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))]) - self.assertTrue(conv_model is not None) - dump_data_and_model( - x_test.astype("float32"), - xgb, - conv_model, - basename="SklearnRFRegressor-Dec3", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) - - @unittest.skipIf(sys.version_info[0] == 2, - reason="sklearn converter not tested on python 2") - def test_sklearn_classifier(self): - xgb, x_test = _fit_classification_model(RandomForestClassifier(), 2) - conv_model = convert_sklearn( - xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))]) - self.assertTrue(conv_model is not None) - dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnRFClassifier", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) - - @unittest.skipIf(sys.version_info[0] == 2, - reason="sklearn converter not tested on python 2") - def test_sklearn_classifier_multi(self): - xgb, x_test = _fit_classification_model(RandomForestClassifier(), 3) - conv_model = convert_sklearn( - xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))]) - self.assertTrue(conv_model is not None) - dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnRFClassifierMulti", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) - - @unittest.skipIf(sys.version_info[0] == 2, - reason="sklearn converter not tested on python 2") - def test_sklearn_classifier_multi_str_labels(self): - xgb, x_test = _fit_classification_model( - RandomForestClassifier(n_estimators=4), 5, is_str=True) - conv_model = convert_sklearn( - xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))]) - self.assertTrue(conv_model is not None) - dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnRFClassifierMultiStrLabels", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) - - @unittest.skipIf(sys.version_info[0] == 2, - reason="sklearn converter not tested on python 2") - def test_sklearn_classifier_multi_discrete_int_labels(self): - iris = load_iris() - x = iris.data[:, :2] - y = iris.target - y[y == 0] = 10 - y[y == 1] = 20 - y[y == 2] = -30 - x_train, x_test, y_train, _ = train_test_split(x, - y, - test_size=0.5, - random_state=42) - xgb = RandomForestClassifier(n_estimators=3) - xgb.fit(x_train, y_train) - conv_model = convert_sklearn( - xgb, initial_types=[('input', FloatTensorType(shape=[None, x_test.shape[1]]))]) - self.assertTrue(conv_model is not None) - dump_data_and_model( - x_test.astype("float32"), - xgb, - conv_model, - basename="SklearnRFClassifierMultiDiscreteIntLabels", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/xgboost/test_xgboost_pipeline.py b/tests/xgboost/test_xgboost_pipeline.py index 21fe5f1c..1cbf4df3 100644 --- a/tests/xgboost/test_xgboost_pipeline.py +++ b/tests/xgboost/test_xgboost_pipeline.py @@ -7,6 +7,7 @@ import numpy as np from numpy.testing import assert_almost_equal import pandas + try: import onnxruntime as rt from xgboost import XGBRegressor, XGBClassifier, train, DMatrix @@ -14,25 +15,33 @@ from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler, OneHotEncoder - from onnxmltools.convert import convert_xgboost + from onnxmltools.convert import convert_xgboost, convert_sklearn from onnxmltools.convert.common.data_types import FloatTensorType from onnxmltools.utils import dump_data_and_model from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost as convert_xgb + from onnxmltools.proto import get_opset_number_from_onnx + can_test = True except ImportError: # python 2.7 can_test = False try: - from skl2onnx import update_registered_converter, to_onnx + from skl2onnx import update_registered_converter from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes + can_test |= True except ImportError: # sklearn-onnx not recent enough can_test = False +@unittest.skipIf(sys.version_info[:2] <= (3, 5), reason="not available") +@unittest.skipIf(sys.version_info[0] == 2, + reason="xgboost converter not tested on python 2") +@unittest.skipIf(not can_test, + reason="sklearn-onnx not recent enough") class TestXGBoostModelsPipeline(unittest.TestCase): - + def _column_tranformer_fitted_from_df(self, data): def transformer_for_column(column): if column.dtype in ['float64', 'float32']: @@ -48,7 +57,6 @@ def transformer_for_column(column): remainder='drop' ).fit(data) - def _convert_dataframe_schema(self, data): def type_for_column(column): if column.dtype in ['float64', 'float32']: @@ -63,40 +71,25 @@ def type_for_column(column): raise ValueError() res = [(col, type_for_column(data[col])) for col in data.columns] - return res + return res - @unittest.skipIf(sys.version_info[:2] <= (3, 5), reason="not available") - @unittest.skipIf(sys.version_info[0] == 2, - reason="xgboost converter not tested on python 2") - @unittest.skipIf(not can_test, - reason="sklearn-onnx not recent enough") def test_xgboost_10_skl_missing(self): self.common_test_xgboost_10_skl(np.nan) - @unittest.skipIf(sys.version_info[:2] <= (3, 5), reason="not available") - @unittest.skipIf(sys.version_info[0] == 2, - reason="xgboost converter not tested on python 2") - @unittest.skipIf(not can_test, - reason="sklearn-onnx not recent enough") def test_xgboost_10_skl_zero(self): try: self.common_test_xgboost_10_skl(0., True) except RuntimeError as e: assert "Cannot convert a XGBoost model where missing values" in str(e) - @unittest.skipIf(sys.version_info[:2] <= (3, 5), reason="not available") - @unittest.skipIf(sys.version_info[0] == 2, - reason="xgboost converter not tested on python 2") - @unittest.skipIf(not can_test, - reason="sklearn-onnx not recent enough") def test_xgboost_10_skl_zero_replace(self): self.common_test_xgboost_10_skl(np.nan, True) - + def common_test_xgboost_10_skl(self, missing, replace=False): this = os.path.abspath(os.path.dirname(__file__)) data = os.path.join(this, "data_fail.csv") data = pandas.read_csv(data) - + for col in data: dtype = data[col].dtype if dtype in ['float64', 'float32']: @@ -112,9 +105,9 @@ def common_test_xgboost_10_skl(self, missing, replace=False): train_df, test_df, train_labels, test_labels = train_test_split( full_df, full_labels, test_size=.2, random_state=11) - + col_transformer = self._column_tranformer_fitted_from_df(full_df) - + param_distributions = { "colsample_bytree": 0.5, "gamma": 0.2, @@ -130,7 +123,7 @@ def common_test_xgboost_10_skl(self, missing, replace=False): regressor.fit(col_transformer.transform(train_df), train_labels) model = Pipeline(steps=[('preprocessor', col_transformer), ('regressor', regressor)]) - + update_registered_converter( XGBRegressor, 'XGBRegressor', calculate_linear_regressor_output_shapes, @@ -140,7 +133,9 @@ def common_test_xgboost_10_skl(self, missing, replace=False): input_xgb = model.steps[0][-1].transform(test_df[:5]).astype(np.float32) if replace: input_xgb[input_xgb[:, :] == missing] = np.nan - onnx_last = to_onnx(model.steps[1][-1], input_xgb) + onnx_last = convert_sklearn(model.steps[1][-1], + initial_types=[('X', FloatTensorType(shape=[None, input_xgb.shape[1]]))], + target_opset=get_opset_number_from_onnx()) session = rt.InferenceSession(onnx_last.SerializeToString()) pred_skl = model.steps[1][-1].predict(input_xgb).ravel() pred_onx = session.run(None, {'X': input_xgb})[0].ravel()