diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml index 40d9ff887..c99ae8bb4 100644 --- a/.azure-pipelines/linux-conda-CI.yml +++ b/.azure-pipelines/linux-conda-CI.yml @@ -200,6 +200,12 @@ jobs: environmentName: 'py$(python.version)' packageSpecs: 'python=$(python.version)' + - script: | + sudo apt-get install -y language-pack-en + sudo locale-gen en_US.UTF-8 + sudo update-locale LANG=en_US.UTF-8 + displayName: 'Install packages' + - script: | test '$(python.version)' == '3.7' && apt-get install protobuf-compiler libprotoc-dev conda config --set always_yes yes --set changeps1 no diff --git a/.gitignore b/.gitignore index 377561ae0..4a5128a88 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,4 @@ docs/tutorial/*.onnx docs/tutorial/*.jpg docs/tutorial/*.png docs/tutorial/*.dot +docs/tutorial/catboost_info diff --git a/CHANGELOGS.md b/CHANGELOGS.md new file mode 100644 index 000000000..929eda009 --- /dev/null +++ b/CHANGELOGS.md @@ -0,0 +1,8 @@ +Change Logs +=========== + +1.16.0 +++++++ + +* add option 'language' to converters of CountVectorizer, TfIdfVectorizer + [#1020](https://github.com/onnx/sklearn-onnx/pull/1020) diff --git a/README.md b/README.md index 6f79fd921..6ac428186 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@

-[![Build Status Linux](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.linux.CI?branchName=refs%2Fpull%2F1009%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=21&branchName=refs%2Fpull%2F1009%2Fmerge) +[![Build Status](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.linux.CI?branchName=refs%2Fpull%2F1020%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=21&branchName=refs%2Fpull%2F1020%2Fmerge) -[![Build Status Windows](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.win.CI?branchName=refs%2Fpull%2F1009%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=22&branchName=refs%2Fpull%2F1009%2Fmerge) +[![Build Status](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.win.CI?branchName=refs%2Fpull%2F1020%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=22&branchName=refs%2Fpull%2F1020%2Fmerge) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) @@ -33,6 +33,38 @@ Or you can install from the source with the latest changes. pip install git+https://github.com/onnx/sklearn-onnx.git ``` +## Getting started + +```python +# Train a model. +import numpy as np +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier + +iris = load_iris() +X, y = iris.data, iris.target +X = X.astype(np.float32) +X_train, X_test, y_train, y_test = train_test_split(X, y) +clr = RandomForestClassifier() +clr.fit(X_train, y_train) + +# Convert into ONNX format. +from skl2onnx import to_onnx + +onx = to_onnx(clr, X[:1]) +with open("rf_iris.onnx", "wb") as f: + f.write(onx.SerializeToString()) + +# Compute the prediction with onnxruntime. +import onnxruntime as rt + +sess = rt.InferenceSession("rf_iris.onnx", providers=["CPUExecutionProvider"]) +input_name = sess.get_inputs()[0].name +label_name = sess.get_outputs()[0].name +pred_onx = sess.run([label_name], {input_name: X_test.astype(np.float32)})[0] +``` + ## Contribute We welcome contributions in the form of feedback, ideas, or code. diff --git a/docs/api_summary.rst b/docs/api_summary.rst index 16041defc..a8c713518 100644 --- a/docs/api_summary.rst +++ b/docs/api_summary.rst @@ -45,7 +45,6 @@ it is possible to enable logging: import logging logger = logging.getLogger('skl2onnx') logger.setLevel(logging.DEBUG) - logging.basicConfig(level=logging.DEBUG) Example :ref:`l-example-logging` illustrates what it looks like. diff --git a/docs/conf.py b/docs/conf.py index a730d3b02..bda240463 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,6 +5,7 @@ import os import sys +import logging import warnings import skl2onnx @@ -72,16 +73,14 @@ linkcode_resolve = make_linkcode_resolve( "skl2onnx", - "https://github.com/onnx/skl2onnx/blob/{revision}/" "{package}/{path}#L{lineno}", + "https://github.com/onnx/skl2onnx/blob/{revision}/{package}/{path}#L{lineno}", ) intersphinx_mapping = { "joblib": ("https://joblib.readthedocs.io/en/latest/", None), "python": ("https://docs.python.org/{.major}".format(sys.version_info), None), "matplotlib": ("https://matplotlib.org/", None), - "mlinsights": ("http://www.xavierdupre.fr/app/mlinsights/helpsphinx/", None), "numpy": ("https://docs.scipy.org/doc/numpy/", None), - "pyquickhelper": ("http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/", None), "onnxruntime": ("https://onnxruntime.ai/docs/api/python/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "scipy": ("https://docs.scipy.org/doc/scipy/reference", None), @@ -144,4 +143,14 @@ def setup(app): # Placeholder to initialize the folder before # generating the documentation. + logger = logging.getLogger("skl2onnx") + logger.setLevel(logging.WARNING) + logger = logging.getLogger("matplotlib.font_manager") + logger.setLevel(logging.WARNING) + logger = logging.getLogger("matplotlib.ticker") + logger.setLevel(logging.WARNING) + logger = logging.getLogger("PIL.PngImagePlugin") + logger.setLevel(logging.WARNING) + logger = logging.getLogger("graphviz._tools") + logger.setLevel(logging.WARNING) return app diff --git a/docs/examples/plot_convert_model.py b/docs/examples/plot_convert_model.py index af00277ef..c4bb1e7c6 100644 --- a/docs/examples/plot_convert_model.py +++ b/docs/examples/plot_convert_model.py @@ -69,7 +69,7 @@ with open("logreg_iris.onnx", "wb") as f: f.write(onx.SerializeToString()) -sess = rt.InferenceSession("logreg_iris.onnx") +sess = rt.InferenceSession("logreg_iris.onnx", providers=["CPUExecutionProvider"]) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] diff --git a/docs/examples/plot_convert_syntax.py b/docs/examples/plot_convert_syntax.py index bdff42218..d6f16ddbe 100644 --- a/docs/examples/plot_convert_syntax.py +++ b/docs/examples/plot_convert_syntax.py @@ -31,7 +31,7 @@ def predict_with_onnxruntime(onx, X): - sess = InferenceSession(onx.SerializeToString()) + sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) input_name = sess.get_inputs()[0].name res = sess.run(None, {input_name: X.astype(np.float32)}) return res[0] diff --git a/docs/examples/plot_convert_zipmap.py b/docs/examples/plot_convert_zipmap.py index fbe48207b..e2a1f5385 100644 --- a/docs/examples/plot_convert_zipmap.py +++ b/docs/examples/plot_convert_zipmap.py @@ -48,7 +48,7 @@ # Let's confirm the output type of the probabilities # is a list of dictionaries with onnxruntime. -sess = rt.InferenceSession(onx.SerializeToString()) +sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) res = sess.run(None, {"float_input": X_test.astype(numpy.float32)}) print(res[1][:2]) print("probabilities type:", type(res[1])) @@ -66,7 +66,9 @@ clr, initial_types=initial_type, options=options, target_opset=12 ) -sess2 = rt.InferenceSession(onx2.SerializeToString()) +sess2 = rt.InferenceSession( + onx2.SerializeToString(), providers=["CPUExecutionProvider"] +) res2 = sess2.run(None, {"float_input": X_test.astype(numpy.float32)}) print(res2[1][:2]) print("probabilities type:", type(res2[1])) @@ -85,7 +87,9 @@ clr, initial_types=initial_type, options=options, target_opset=12 ) -sess3 = rt.InferenceSession(onx3.SerializeToString()) +sess3 = rt.InferenceSession( + onx3.SerializeToString(), providers=["CPUExecutionProvider"] +) res3 = sess3.run(None, {"float_input": X_test.astype(numpy.float32)}) for i, out in enumerate(sess3.get_outputs()): print( diff --git a/docs/examples/plot_custom_model.py b/docs/examples/plot_custom_model.py index ac14d66c3..5a35811c0 100644 --- a/docs/examples/plot_custom_model.py +++ b/docs/examples/plot_custom_model.py @@ -410,7 +410,7 @@ def predictable_tsne_converter(scope, operator, container): ########################## # Predictions with onnxruntime. -sess = rt.InferenceSession("predictable_tsne.onnx") +sess = rt.InferenceSession("predictable_tsne.onnx", providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, {"input": X_test[:1].astype(numpy.float32)}) print("transform", pred_onx[0]) diff --git a/docs/examples/plot_custom_parser.py b/docs/examples/plot_custom_parser.py index ecbe99f95..64102ab79 100644 --- a/docs/examples/plot_custom_parser.py +++ b/docs/examples/plot_custom_parser.py @@ -259,7 +259,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): X32 = X_test[:5].astype(np.float32) -sess = rt.InferenceSession(model_onnx.SerializeToString()) +sess = rt.InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] +) results = sess.run(None, {"X": X32}) print("--labels--") diff --git a/docs/examples/plot_custom_parser_alternative.py b/docs/examples/plot_custom_parser_alternative.py index 48b41a0b1..9a28f5103 100644 --- a/docs/examples/plot_custom_parser_alternative.py +++ b/docs/examples/plot_custom_parser_alternative.py @@ -236,7 +236,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None): X32 = X_test[:5].astype(np.float32) -sess = rt.InferenceSession(model_onnx.SerializeToString()) +sess = rt.InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] +) results = sess.run(None, {"X": X32}) print("--labels--") diff --git a/docs/examples/plot_errors_onnxruntime.py b/docs/examples/plot_errors_onnxruntime.py index 5e92fdd30..0d364d5d9 100644 --- a/docs/examples/plot_errors_onnxruntime.py +++ b/docs/examples/plot_errors_onnxruntime.py @@ -41,7 +41,7 @@ ) example2 = "logreg_iris.onnx" -sess = rt.InferenceSession(example2) +sess = rt.InferenceSession(example2, providers=["CPUExecutionProvider"]) input_name = sess.get_inputs()[0].name output_name = sess.get_outputs()[0].name diff --git a/docs/examples/plot_gpr.py b/docs/examples/plot_gpr.py index b38412ecd..c97064d21 100644 --- a/docs/examples/plot_gpr.py +++ b/docs/examples/plot_gpr.py @@ -51,7 +51,7 @@ initial_type = [("X", FloatTensorType([None, X_train.shape[1]]))] onx = convert_sklearn(gpr, initial_types=initial_type, target_opset=12) -sess = rt.InferenceSession(onx.SerializeToString()) +sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) try: pred_onx = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] except RuntimeError as e: @@ -74,7 +74,7 @@ initial_type = [("X", FloatTensorType([None, None]))] onx = convert_sklearn(gpr, initial_types=initial_type, target_opset=12) -sess = rt.InferenceSession(onx.SerializeToString()) +sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] pred_skl = gpr.predict(X_test) @@ -111,7 +111,9 @@ initial_type = [("X", DoubleTensorType([None, None]))] onx64 = convert_sklearn(gpr, initial_types=initial_type, target_opset=12) -sess64 = rt.InferenceSession(onx64.SerializeToString()) +sess64 = rt.InferenceSession( + onx64.SerializeToString(), providers=["CPUExecutionProvider"] +) pred_onx64 = sess64.run(None, {"X": X_test})[0] print(pred_onx64[0, :10]) @@ -169,7 +171,9 @@ gpr, initial_types=initial_type, options=options, target_opset=12 ) -sess64_std = rt.InferenceSession(onx64_std.SerializeToString()) +sess64_std = rt.InferenceSession( + onx64_std.SerializeToString(), providers=["CPUExecutionProvider"] +) pred_onx64_std = sess64_std.run(None, {"X": X_test[:5]}) pprint.pprint(pred_onx64_std) diff --git a/docs/examples/plot_intermediate_outputs.py b/docs/examples/plot_intermediate_outputs.py index 98937ce83..5c3b6493f 100644 --- a/docs/examples/plot_intermediate_outputs.py +++ b/docs/examples/plot_intermediate_outputs.py @@ -195,7 +195,7 @@ def convert_dataframe_schema(df, drop=None): ################################ # We are ready to run *onnxruntime*. -sess = rt.InferenceSession("pipeline_titanic.onnx") +sess = rt.InferenceSession("pipeline_titanic.onnx", providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, inputs) print("predict", pred_onx[0][:5]) print("predict_proba", pred_onx[1][:1]) @@ -228,7 +228,9 @@ def convert_dataframe_schema(df, drop=None): ################################ # Let's compute the numerical features. -sess = rt.InferenceSession("pipeline_titanic_numerical.onnx") +sess = rt.InferenceSession( + "pipeline_titanic_numerical.onnx", providers=["CPUExecutionProvider"] +) numX = sess.run(None, inputs) print("numerical features", numX[0][:1]) @@ -238,7 +240,9 @@ def convert_dataframe_schema(df, drop=None): print(model_onnx) text_onnx = select_model_inputs_outputs(model_onnx, "variable2") save_onnx_model(text_onnx, "pipeline_titanic_textual.onnx") -sess = rt.InferenceSession("pipeline_titanic_textual.onnx") +sess = rt.InferenceSession( + "pipeline_titanic_textual.onnx", providers=["CPUExecutionProvider"] +) numT = sess.run(None, inputs) print("textual features", numT[0][:1]) diff --git a/docs/examples/plot_investigate_pipeline.py b/docs/examples/plot_investigate_pipeline.py index b131d4fb8..2f532d139 100644 --- a/docs/examples/plot_investigate_pipeline.py +++ b/docs/examples/plot_investigate_pipeline.py @@ -55,7 +55,9 @@ initial_types = [("input", FloatTensorType((None, X_digits.shape[1])))] model_onnx = convert_sklearn(pipe, initial_types=initial_types, target_opset=12) -sess = rt.InferenceSession(model_onnx.SerializeToString()) +sess = rt.InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] +) print("skl predict_proba") print(pipe.predict_proba(X_digits[:2])) onx_pred = sess.run(None, {"input": X_digits[:2].astype(np.float32)})[1] @@ -82,7 +84,9 @@ for i, step in enumerate(steps): onnx_step = step["onnx_step"] - sess = rt.InferenceSession(onnx_step.SerializeToString()) + sess = rt.InferenceSession( + onnx_step.SerializeToString(), providers=["CPUExecutionProvider"] + ) onnx_outputs = sess.run(None, {"input": X_digits[:2].astype(np.float32)}) skl_outputs = step["model"]._debug.outputs print("step 1", type(step["model"])) diff --git a/docs/examples/plot_logging.py b/docs/examples/plot_logging.py index 724d35657..39fd7ac03 100644 --- a/docs/examples/plot_logging.py +++ b/docs/examples/plot_logging.py @@ -46,7 +46,7 @@ onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12) -sess = rt.InferenceSession(onx.SerializeToString()) +sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] @@ -74,7 +74,6 @@ logger = logging.getLogger("skl2onnx") logger.setLevel(logging.DEBUG) -logging.basicConfig(level=logging.DEBUG) convert_sklearn(clr, initial_types=initial_type, target_opset=12) @@ -82,10 +81,10 @@ # And to disable it. logger.setLevel(logging.INFO) -logging.basicConfig(level=logging.INFO) convert_sklearn(clr, initial_types=initial_type, target_opset=12) +logger.setLevel(logging.WARNING) ################################# # **Versions used for this example** diff --git a/docs/examples/plot_metadata.py b/docs/examples/plot_metadata.py index 3b3631164..fc0bde3c6 100644 --- a/docs/examples/plot_metadata.py +++ b/docs/examples/plot_metadata.py @@ -39,7 +39,7 @@ ############################# # With *ONNX Runtime*: -sess = InferenceSession(example) +sess = InferenceSession(example, providers=["CPUExecutionProvider"]) meta = sess.get_modelmeta() print("custom_metadata_map={}".format(meta.custom_metadata_map)) diff --git a/docs/examples/plot_nmf.py b/docs/examples/plot_nmf.py index 4b9be8605..89e8acb02 100644 --- a/docs/examples/plot_nmf.py +++ b/docs/examples/plot_nmf.py @@ -113,7 +113,9 @@ def nmf_to_onnx(W, H, op_version=12): ######################################## # Let's compute prediction with it. -sess = InferenceSession(model_onnx.SerializeToString()) +sess = InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] +) def predict_onnx(sess, row_indices, col_indices): diff --git a/docs/examples/plot_onnx_operators.py b/docs/examples/plot_onnx_operators.py index 9a72fd884..cd7d7af1b 100644 --- a/docs/examples/plot_onnx_operators.py +++ b/docs/examples/plot_onnx_operators.py @@ -153,7 +153,9 @@ def predict_with_onnxruntime(model_def, *inputs): import onnxruntime as ort - sess = ort.InferenceSession(model_def.SerializeToString()) + sess = ort.InferenceSession( + model_def.SerializeToString(), providers=["CPUExecutionProvider"] + ) names = [i.name for i in sess.get_inputs()] dinputs = {name: input for name, input in zip(names, inputs)} res = sess.run(None, dinputs) diff --git a/docs/examples/plot_pipeline_lightgbm.py b/docs/examples/plot_pipeline_lightgbm.py index fb584e5a1..fe89480a6 100644 --- a/docs/examples/plot_pipeline_lightgbm.py +++ b/docs/examples/plot_pipeline_lightgbm.py @@ -112,7 +112,9 @@ # Predictions with onnxruntime. try: - sess = rt.InferenceSession("pipeline_lightgbm.onnx") + sess = rt.InferenceSession( + "pipeline_lightgbm.onnx", providers=["CPUExecutionProvider"] + ) except OrtFail as e: print(e) print("The converter requires onnxmltools>=1.7.0") diff --git a/docs/examples/plot_pipeline_xgboost.py b/docs/examples/plot_pipeline_xgboost.py index 3feaa6d87..69e4ef4db 100644 --- a/docs/examples/plot_pipeline_xgboost.py +++ b/docs/examples/plot_pipeline_xgboost.py @@ -128,7 +128,7 @@ ########################## # Predictions with onnxruntime. -sess = rt.InferenceSession("pipeline_xgboost.onnx") +sess = rt.InferenceSession("pipeline_xgboost.onnx", providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)}) print("predict", pred_onx[0]) print("predict_proba", pred_onx[1][:1]) diff --git a/docs/examples/plot_tfidfvectorizer.py b/docs/examples/plot_tfidfvectorizer.py index 290eb5e36..a4e849f7d 100644 --- a/docs/examples/plot_tfidfvectorizer.py +++ b/docs/examples/plot_tfidfvectorizer.py @@ -197,7 +197,7 @@ def transform(self, posts): ########################## # Predictions with onnxruntime. -sess = rt.InferenceSession("pipeline_tfidf.onnx") +sess = rt.InferenceSession("pipeline_tfidf.onnx", providers=["CPUExecutionProvider"]) print("---", train_data[0]) inputs = {"input": train_data[:1]} pred_onx = sess.run(None, inputs) diff --git a/docs/index.rst b/docs/index.rst index 02ba5ade9..bfff99e39 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -69,33 +69,36 @@ to automatically check every converter with `onnxruntime-gpu `_. Every converter is tested with this backend. +**Getting started** + :: - # Train a model. + import numpy as np from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier + iris = load_iris() X, y = iris.data, iris.target + X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y) clr = RandomForestClassifier() clr.fit(X_train, y_train) - # Convert into ONNX format - from skl2onnx import convert_sklearn - from skl2onnx.common.data_types import FloatTensorType - initial_type = [('float_input', FloatTensorType([None, 4]))] - onx = convert_sklearn(clr, initial_types=initial_type) + # Convert into ONNX format. + from skl2onnx import to_onnx + + onx = to_onnx(clr, X[:1]) with open("rf_iris.onnx", "wb") as f: f.write(onx.SerializeToString()) - # Compute the prediction with ONNX Runtime + # Compute the prediction with onnxruntime. import onnxruntime as rt - import numpy + sess = rt.InferenceSession("rf_iris.onnx", providers=["CPUExecutionProvider"]) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name - pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] + pred_onx = sess.run([label_name], {input_name: X_test.astype(np.float32)})[0] **Related converters** diff --git a/docs/index_tutorial.rst b/docs/index_tutorial.rst index 524ad0481..cb46f834b 100644 --- a/docs/index_tutorial.rst +++ b/docs/index_tutorial.rst @@ -25,6 +25,7 @@ The tutorial was tested with following version: .. runpython:: :showcode: + import catboost import numpy import scipy import sklearn @@ -34,11 +35,10 @@ The tutorial was tested with following version: import onnxruntime import xgboost import skl2onnx - import pyquickhelper - mods = [numpy, scipy, sklearn, lightgbm, xgboost, + mods = [numpy, scipy, sklearn, lightgbm, xgboost, catboost, onnx, onnxmltools, onnxruntime, - skl2onnx, pyquickhelper] + skl2onnx] mods = [(m.__name__, m.__version__) for m in mods] mx = max(len(_[0]) for _ in mods) + 1 for name, vers in sorted(mods): diff --git a/docs/tutorial/plot_abegin_convert_pipeline.py b/docs/tutorial/plot_abegin_convert_pipeline.py index 9596fe305..fae79bc22 100644 --- a/docs/tutorial/plot_abegin_convert_pipeline.py +++ b/docs/tutorial/plot_abegin_convert_pipeline.py @@ -65,7 +65,7 @@ # # The first example uses :epkg:`onnxruntime`. -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) pred_ort = sess.run(None, {"X": X_test.astype(numpy.float32)})[0] pred_skl = ereg.predict(X_test.astype(numpy.float32)) diff --git a/docs/tutorial/plot_bbegin_measure_time.py b/docs/tutorial/plot_bbegin_measure_time.py index 823211506..3b6b83382 100644 --- a/docs/tutorial/plot_bbegin_measure_time.py +++ b/docs/tutorial/plot_bbegin_measure_time.py @@ -55,7 +55,7 @@ # We measure the processing time per observation whether # or not an observation belongs to a batch or is a single one. -sizes = [(1, 50), (10, 50), (1000, 10), (10000, 5)] +sizes = [(1, 50), (10, 50), (100, 10)] with config_context(assume_finite=True): obs = [] diff --git a/docs/tutorial/plot_catwoe_transformer.py b/docs/tutorial/plot_catwoe_transformer.py index bf9bed2fe..aca734bd0 100644 --- a/docs/tutorial/plot_catwoe_transformer.py +++ b/docs/tutorial/plot_catwoe_transformer.py @@ -131,7 +131,7 @@ def ordinal_encoder_converter(scope, operator, container): ord_onx = to_onnx(enc, X[:1], target_opset=14) -sess = InferenceSession(ord_onx.SerializeToString()) +sess = InferenceSession(ord_onx.SerializeToString(), providers=["CPUExecutionProvider"]) print(sess.run(None, {"X": X[:5]})[0]) ###################################### @@ -232,5 +232,5 @@ def woe_encoder_converter(scope, operator, container): woe_onx = to_onnx(woe, X[:1], target_opset=14) -sess = InferenceSession(woe_onx.SerializeToString()) +sess = InferenceSession(woe_onx.SerializeToString(), providers=["CPUExecutionProvider"]) print(sess.run(None, {"X": X[:5]})[0]) diff --git a/docs/tutorial/plot_dbegin_options.py b/docs/tutorial/plot_dbegin_options.py index 47b813f3a..e21a5df11 100644 --- a/docs/tutorial/plot_dbegin_options.py +++ b/docs/tutorial/plot_dbegin_options.py @@ -34,17 +34,18 @@ iris = load_iris() X, y = iris.data, iris.target X_train, _, y_train, __ = train_test_split(X, y, random_state=11) - clr = LogisticRegression() + clr = LogisticRegression(max_iter=1000) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32)) - print(printable_graph(model_def)) + print(printable_graph(model_def.graph)) This operator is not really efficient as it copies every probabilies and labels in a different container. This time is usually significant for small classifiers. Then it makes sense to remove it. .. runpython:: + import numpy from onnx.helper import printable_graph from sklearn.datasets import load_iris @@ -55,12 +56,12 @@ iris = load_iris() X, y = iris.data, iris.target X_train, _, y_train, __ = train_test_split(X, y, random_state=11) - clr = LogisticRegression() + clr = LogisticRegression(max_iter=1000) clr.fit(X_train, y_train) model_def = to_onnx(clr, X_train.astype(numpy.float32), options={LogisticRegression: {'zipmap': False}}) - print(printable_graph(model_def)) + print(printable_graph(model_def.graph)) There might be in the graph many classifiers, it is important to have a way to specify which classifier should keep its *ZipMap* @@ -199,7 +200,9 @@ X_train.astype(numpy.float32), options={id(clrrf): {"decision_path": True, "zipmap": False}}, ) -sess = InferenceSession(model_def.SerializeToString()) +sess = InferenceSession( + model_def.SerializeToString(), providers=["CPUExecutionProvider"] +) ########################################## # The model produces 3 outputs. diff --git a/docs/tutorial/plot_dbegin_options_zipmap.py b/docs/tutorial/plot_dbegin_options_zipmap.py index d7eae7be8..2b71569c1 100644 --- a/docs/tutorial/plot_dbegin_options_zipmap.py +++ b/docs/tutorial/plot_dbegin_options_zipmap.py @@ -48,7 +48,7 @@ # The output type for the probabilities is a list of # dictionaries. -sess = rt.InferenceSession(onx.SerializeToString()) +sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) res = sess.run(None, {"X": X_test}) print(res[1][:2]) print("probabilities type:", type(res[1])) @@ -64,7 +64,9 @@ options = {id(clr): {"zipmap": False}} onx2 = to_onnx(clr, X_train, options=options, target_opset=12) -sess2 = rt.InferenceSession(onx2.SerializeToString()) +sess2 = rt.InferenceSession( + onx2.SerializeToString(), providers=["CPUExecutionProvider"] +) res2 = sess2.run(None, {"X": X_test}) print(res2[1][:2]) print("probabilities type:", type(res2[1])) @@ -81,7 +83,9 @@ options = {id(clr): {"zipmap": "columns"}} onx3 = to_onnx(clr, X_train, options=options, target_opset=12) -sess3 = rt.InferenceSession(onx3.SerializeToString()) +sess3 = rt.InferenceSession( + onx3.SerializeToString(), providers=["CPUExecutionProvider"] +) res3 = sess3.run(None, {"X": X_test}) for i, out in enumerate(sess3.get_outputs()): print( @@ -124,7 +128,9 @@ options = {id(clr): {"zipmap": False, "output_class_labels": True}} onx4 = to_onnx(clr, X_train, options=options, target_opset=12) -sess4 = rt.InferenceSession(onx4.SerializeToString()) +sess4 = rt.InferenceSession( + onx4.SerializeToString(), providers=["CPUExecutionProvider"] +) res4 = sess4.run(None, {"X": X_test}) print(res4[1][:2]) print("probabilities type:", type(res4[1])) @@ -159,7 +165,9 @@ onx5 = to_onnx(clr, X_train, target_opset=12) -sess5 = rt.InferenceSession(onx5.SerializeToString()) +sess5 = rt.InferenceSession( + onx5.SerializeToString(), providers=["CPUExecutionProvider"] +) res5 = sess5.run(None, {"X": X_test[:3]}) print(res5) @@ -174,7 +182,9 @@ options={"zipmap": False, "output_class_labels": True}, ) -sess6 = rt.InferenceSession(onx6.SerializeToString()) +sess6 = rt.InferenceSession( + onx6.SerializeToString(), providers=["CPUExecutionProvider"] +) res6 = sess6.run(None, {"X": X_test[:3]}) print("predicted labels", res6[0]) print("predicted probabilies", res6[1]) diff --git a/docs/tutorial/plot_ebegin_float_double.py b/docs/tutorial/plot_ebegin_float_double.py index f61db244c..e3fc22dd0 100644 --- a/docs/tutorial/plot_ebegin_float_double.py +++ b/docs/tutorial/plot_ebegin_float_double.py @@ -147,7 +147,7 @@ def diff(p1, p2): onx = to_onnx(model, Xi_train[:1].astype(numpy.float32), target_opset=15) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) X32 = Xi_test.astype(numpy.float32) @@ -207,7 +207,7 @@ def diff(p1, p2): onx2 = to_onnx(model2, Xi_train[:1].astype(numpy.float32), target_opset=15) -sess2 = InferenceSession(onx2.SerializeToString()) +sess2 = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"]) skl2 = model2.predict(X32) ort2 = sess2.run(None, {"X": X32})[0] @@ -238,7 +238,7 @@ def diff(p1, p2): target_opset=15, ) -sess3 = InferenceSession(onx3.SerializeToString()) +sess3 = InferenceSession(onx3.SerializeToString(), providers=["CPUExecutionProvider"]) skl3 = model3.predict(X32) ort3 = sess3.run(None, {"X": X32})[0] diff --git a/docs/tutorial/plot_gbegin_cst.py b/docs/tutorial/plot_gbegin_cst.py index 0b4e21c18..1f3a47902 100644 --- a/docs/tutorial/plot_gbegin_cst.py +++ b/docs/tutorial/plot_gbegin_cst.py @@ -52,7 +52,7 @@ # Inference # +++++++++ -sess = InferenceSession(new_onx.SerializeToString()) +sess = InferenceSession(new_onx.SerializeToString(), providers=["CPUExecutionProvider"]) print("output names:", [o.name for o in sess.get_outputs()]) res = sess.run(None, {"X": X_test[:2]}) print("outputs") @@ -73,7 +73,9 @@ simple_onx = select_model_inputs_outputs(new_onx, ["probabilities"]) -sess = InferenceSession(simple_onx.SerializeToString()) +sess = InferenceSession( + simple_onx.SerializeToString(), providers=["CPUExecutionProvider"] +) print("output names:", [o.name for o in sess.get_outputs()]) res = sess.run(None, {"X": X_test[:2]}) print("outputs") @@ -97,9 +99,9 @@ # ++++++++++++ -model = load("simplified_model.onnx", "wb") +model = load("simplified_model.onnx") -sess = InferenceSession(model.SerializeToString()) +sess = InferenceSession(model.SerializeToString(), providers=["CPUExecutionProvider"]) print("output names:", [o.name for o in sess.get_outputs()]) res = sess.run(None, {"X": X_test[:2]}) print("outputs") diff --git a/docs/tutorial/plot_gbegin_dataframe.py b/docs/tutorial/plot_gbegin_dataframe.py index d30dab6e7..39a199660 100644 --- a/docs/tutorial/plot_gbegin_dataframe.py +++ b/docs/tutorial/plot_gbegin_dataframe.py @@ -78,7 +78,7 @@ # *onnxruntime* does not support dataframes. -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) try: sess.run(None, train_data) except Exception as e: @@ -161,7 +161,7 @@ def guess_schema_from_data(X): ############################# # Inference. -sess2 = InferenceSession(onx2.SerializeToString()) +sess2 = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"]) got2 = sess2.run(None, inputs) diff --git a/docs/tutorial/plot_gconverting.py b/docs/tutorial/plot_gconverting.py index 3d2f41ab1..bf0cc91f6 100644 --- a/docs/tutorial/plot_gconverting.py +++ b/docs/tutorial/plot_gconverting.py @@ -30,7 +30,7 @@ onx = to_onnx(clr, X, options={"zipmap": False}, target_opset=15) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) input_names = [i.name for i in sess.get_inputs()] output_names = [o.name for o in sess.get_outputs()] print("inputs=%r, outputs=%r" % (input_names, output_names)) @@ -53,7 +53,7 @@ target_opset=15, ) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) input_names = [i.name for i in sess.get_inputs()] output_names = [o.name for o in sess.get_outputs()] print("inputs=%r, outputs=%r" % (input_names, output_names)) @@ -75,7 +75,7 @@ target_opset=15, ) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) input_names = [i.name for i in sess.get_inputs()] output_names = [o.name for o in sess.get_outputs()] print("inputs=%r, outputs=%r" % (input_names, output_names)) @@ -100,7 +100,7 @@ def rename_results(proposed_name, existing_names): onx = to_onnx(clr, X, options={"zipmap": False}, naming=rename_results, target_opset=15) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) input_names = [i.name for i in sess.get_inputs()] output_names = [o.name for o in sess.get_outputs()] print("inputs=%r, outputs=%r" % (input_names, output_names)) diff --git a/docs/tutorial/plot_gexternal_catboost.py b/docs/tutorial/plot_gexternal_catboost.py index d4ee99478..a9c6bd48f 100644 --- a/docs/tutorial/plot_gexternal_catboost.py +++ b/docs/tutorial/plot_gexternal_catboost.py @@ -159,7 +159,7 @@ def skl2onnx_convert_catboost(scope, operator, container): ########################## # Predictions with onnxruntime. -sess = rt.InferenceSession("pipeline_catboost.onnx") +sess = rt.InferenceSession("pipeline_catboost.onnx", providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)}) print("predict", pred_onx[0]) diff --git a/docs/tutorial/plot_gexternal_lightgbm.py b/docs/tutorial/plot_gexternal_lightgbm.py index 1149e93d5..30a3db914 100644 --- a/docs/tutorial/plot_gexternal_lightgbm.py +++ b/docs/tutorial/plot_gexternal_lightgbm.py @@ -96,7 +96,7 @@ ########################## # Predictions with onnxruntime. -sess = rt.InferenceSession("pipeline_lightgbm.onnx") +sess = rt.InferenceSession("pipeline_lightgbm.onnx", providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)}) print("predict", pred_onx[0]) diff --git a/docs/tutorial/plot_gexternal_lightgbm_reg.py b/docs/tutorial/plot_gexternal_lightgbm_reg.py index 2f243d002..19f4d4e03 100644 --- a/docs/tutorial/plot_gexternal_lightgbm_reg.py +++ b/docs/tutorial/plot_gexternal_lightgbm_reg.py @@ -121,8 +121,12 @@ def skl2onnx_convert_lightgbm(scope, operator, container): # Discrepancies # +++++++++++++ -sess = InferenceSession(model_onnx.SerializeToString()) -sess_split = InferenceSession(model_onnx_split.SerializeToString()) +sess = InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] +) +sess_split = InferenceSession( + model_onnx_split.SerializeToString(), providers=["CPUExecutionProvider"] +) X32 = X.astype(numpy.float32) expected = reg.predict(X32) @@ -175,7 +179,9 @@ def skl2onnx_convert_lightgbm(scope, operator, container): target_opset={"": 14, "ai.onnx.ml": 2}, options={"split": i}, ) - sess_split = InferenceSession(model_onnx_split.SerializeToString()) + sess_split = InferenceSession( + model_onnx_split.SerializeToString(), providers=["CPUExecutionProvider"] + ) got_split = sess_split.run(None, {"X": X32})[0].ravel() disc_split = numpy.abs(got_split - expected).max() res.append(dict(split=i, disc=disc_split)) diff --git a/docs/tutorial/plot_gexternal_xgboost.py b/docs/tutorial/plot_gexternal_xgboost.py index 15ed13263..b689c780b 100644 --- a/docs/tutorial/plot_gexternal_xgboost.py +++ b/docs/tutorial/plot_gexternal_xgboost.py @@ -117,7 +117,7 @@ ########################## # Predictions with onnxruntime. -sess = rt.InferenceSession("pipeline_xgboost.onnx") +sess = rt.InferenceSession("pipeline_xgboost.onnx", providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)}) print("predict", pred_onx[0]) print("predict_proba", pred_onx[1][:1]) @@ -151,7 +151,7 @@ pipe, X_train.astype(numpy.float32), target_opset={"": 12, "ai.onnx.ml": 2} ) -sess = rt.InferenceSession(onx.SerializeToString()) +sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) pred_onx = sess.run(None, {"X": X_test[:5].astype(numpy.float32)}) print("predict", pred_onx[0].ravel()) @@ -187,7 +187,9 @@ cont = False if cont: - sess = rt.InferenceSession(onx.SerializeToString()) + sess = rt.InferenceSession( + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) input_name = sess.get_inputs()[0].name label_name = sess.get_outputs()[0].name pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0] diff --git a/docs/tutorial/plot_jcustom_syntax.py b/docs/tutorial/plot_jcustom_syntax.py index acdd6795e..e56fc8912 100644 --- a/docs/tutorial/plot_jcustom_syntax.py +++ b/docs/tutorial/plot_jcustom_syntax.py @@ -153,7 +153,7 @@ def decorrelate_transformer_converter(scope, operator, container): onx = to_onnx(dec, X.astype(numpy.float32)) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float32)) got = sess.run(None, {"X": X.astype(numpy.float32)})[0] @@ -173,7 +173,7 @@ def diff(p1, p2): onx = to_onnx(dec, X.astype(numpy.float64)) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float64)) got = sess.run(None, {"X": X.astype(numpy.float64)})[0] diff --git a/docs/tutorial/plot_kcustom_converter_wrapper.py b/docs/tutorial/plot_kcustom_converter_wrapper.py index dc9388bed..f93c35ab9 100644 --- a/docs/tutorial/plot_kcustom_converter_wrapper.py +++ b/docs/tutorial/plot_kcustom_converter_wrapper.py @@ -160,7 +160,7 @@ def decorrelate_transformer_converter(scope, operator, container): onx = to_onnx(dec, X.astype(numpy.float32)) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float32)) got = sess.run(None, {"X": X.astype(numpy.float32)})[0] @@ -180,7 +180,7 @@ def diff(p1, p2): onx = to_onnx(dec, X.astype(numpy.float64)) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float64)) got = sess.run(None, {"X": X.astype(numpy.float64)})[0] diff --git a/docs/tutorial/plot_lcustom_options.py b/docs/tutorial/plot_lcustom_options.py index 637f19793..805b26139 100644 --- a/docs/tutorial/plot_lcustom_options.py +++ b/docs/tutorial/plot_lcustom_options.py @@ -143,7 +143,7 @@ def decorrelate_transformer_converter(scope, operator, container): onx = to_onnx(dec, X.astype(numpy.float32)) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float32)) got = sess.run(None, {"X": X.astype(numpy.float32)})[0] @@ -163,7 +163,7 @@ def diff(p1, p2): onx2 = to_onnx(dec, X.astype(numpy.float32), options={"use_gemm": True}) -sess2 = InferenceSession(onx2.SerializeToString()) +sess2 = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float32)) got2 = sess2.run(None, {"X": X.astype(numpy.float32)})[0] diff --git a/docs/tutorial/plot_mcustom_parser.py b/docs/tutorial/plot_mcustom_parser.py index 84dcd29f2..4ef837a75 100644 --- a/docs/tutorial/plot_mcustom_parser.py +++ b/docs/tutorial/plot_mcustom_parser.py @@ -161,7 +161,7 @@ def decorrelate_transformer_parser(scope, model, inputs, custom_parsers=None): onx = to_onnx(dec, X.astype(numpy.float32), target_opset=14) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) exp = dec.transform(X.astype(numpy.float32)) results = sess.run(None, {"X": X.astype(numpy.float32)}) diff --git a/docs/tutorial/plot_ngrams.py b/docs/tutorial/plot_ngrams.py index 65d8540c9..e6ecb79bc 100644 --- a/docs/tutorial/plot_ngrams.py +++ b/docs/tutorial/plot_ngrams.py @@ -81,7 +81,7 @@ # declared. onx = to_onnx(mod2, corpus) -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) got = sess.run(None, {"X": corpus}) ################################### diff --git a/docs/tutorial/plot_transformer_discrepancy.py b/docs/tutorial/plot_transformer_discrepancy.py index f6c093957..ea9d7ae56 100644 --- a/docs/tutorial/plot_transformer_discrepancy.py +++ b/docs/tutorial/plot_transformer_discrepancy.py @@ -105,7 +105,7 @@ def diff(a, b): # Execution with ONNX # +++++++++++++++++++ -sess = InferenceSession(onx.SerializeToString()) +sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"]) got = sess.run(None, {"X": strings})[0] print(f"differences={diff(tr, got):g}") print(print_sparse_matrix(got)) diff --git a/docs/tutorial/plot_usparse_xgboost.py b/docs/tutorial/plot_usparse_xgboost.py index b8ae97aa7..8885b43b0 100644 --- a/docs/tutorial/plot_usparse_xgboost.py +++ b/docs/tutorial/plot_usparse_xgboost.py @@ -204,7 +204,9 @@ def make_pipelines( with open("model.onnx", "wb") as f: f.write(model_onnx.SerializeToString()) - sess = rt.InferenceSession(model_onnx.SerializeToString()) + sess = rt.InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) inputs = { "input": df[["c0", "c1"]].values.astype(numpy.float32), "text": df[["text"]].values, diff --git a/docs/tutorial/plot_woe_transformer.py b/docs/tutorial/plot_woe_transformer.py index ba45ef414..ce93ee2ee 100644 --- a/docs/tutorial/plot_woe_transformer.py +++ b/docs/tutorial/plot_woe_transformer.py @@ -80,14 +80,14 @@ # # onehot=False onx1 = to_onnx(woe1, X) -sess = InferenceSession(onx1.SerializeToString()) +sess = InferenceSession(onx1.SerializeToString(), providers=["CPUExecutionProvider"]) print(sess.run(None, {"X": X})[0]) ################################## # onehot=True onx2 = to_onnx(woe2, X) -sess = InferenceSession(onx2.SerializeToString()) +sess = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"]) print(sess.run(None, {"X": X})[0]) ################################################ @@ -153,5 +153,5 @@ # And the conversion to ONNX using the same instruction. onxinf = to_onnx(woe1, X) -sess = InferenceSession(onxinf.SerializeToString()) +sess = InferenceSession(onxinf.SerializeToString(), providers=["CPUExecutionProvider"]) print(sess.run(None, {"X": X})[0]) diff --git a/skl2onnx/__init__.py b/skl2onnx/__init__.py index 8bb46e679..73210f047 100644 --- a/skl2onnx/__init__.py +++ b/skl2onnx/__init__.py @@ -3,7 +3,7 @@ """ Main entry point to the converter from the *scikit-learn* to *onnx*. """ -__version__ = "1.15.0" +__version__ = "1.16.0" __author__ = "Microsoft" __producer__ = "skl2onnx" __producer_version__ = __version__ diff --git a/skl2onnx/common/_topology.py b/skl2onnx/common/_topology.py index a5f5139a9..0a20c916b 100644 --- a/skl2onnx/common/_topology.py +++ b/skl2onnx/common/_topology.py @@ -976,9 +976,9 @@ def __init__( array types from CoreML. It's usually 1 or None. :param initial_types: A list providing some types for some - root variables. - Each element is a tuple of a variable name and a type defined - in *data_types.py*. + root variables. + Each element is a tuple of a variable name and a type defined + in *data_types.py*. :param custom_conversion_functions: a dictionary for specifying the user customized conversion function :param custom_shape_calculators: a dictionary for specifying the @@ -1001,12 +1001,12 @@ def __init__( for k in self.custom_conversion_functions: if not callable(k): raise TypeError( - "Keys in custom_conversion_functions must be " "types not strings." + "Keys in custom_conversion_functions must be types not strings." ) for k in self.custom_shape_calculators: if not callable(k): raise TypeError( - "Keys in custom_shape_calculators must be " "types not strings." + "Keys in custom_shape_calculators must be types not strings." ) # A map of local overwritten model aliases. @@ -1472,7 +1472,6 @@ def convert_topology( model_name, doc_string, target_opset, - channel_first_inputs=None, options=None, remove_identity=True, verbose=0, diff --git a/skl2onnx/operator_converters/text_vectoriser.py b/skl2onnx/operator_converters/text_vectoriser.py index 5d5ba7eba..b0244ba65 100644 --- a/skl2onnx/operator_converters/text_vectoriser.py +++ b/skl2onnx/operator_converters/text_vectoriser.py @@ -148,6 +148,10 @@ def convert_sklearn_text_vectorizer( These separators are used to split a string into words. Options *separators* is ignore if options *tokenexp* is not None. Default value: ``[' ', '[.]', '\\\\?', ',', ';', ':', '\\\\!']``. + locale: + The locale is not mentioned in scikit-object. This option can be + used to change the value for parameter `locale` of ONNX operator + `StringNormalizer`. Example (from :ref:`l-example-tfidfvectorizer`): @@ -219,9 +223,15 @@ def convert_sklearn_text_vectorizer( options = container.get_options( op, - dict(separators="DEFAULT", tokenexp=None, nan=False, keep_empty_string=False), + dict( + separators="DEFAULT", + tokenexp=None, + nan=False, + keep_empty_string=False, + locale=None, + ), ) - if set(options) != {"separators", "tokenexp", "nan", "keep_empty_string"}: + if set(options) != {"separators", "tokenexp", "nan", "keep_empty_string", "locale"}: raise RuntimeError( "Unknown option {} for {}".format(set(options) - {"separators"}, type(op)) ) @@ -309,6 +319,8 @@ def convert_sklearn_text_vectorizer( ) op_version = 9 domain = "com.microsoft" + if options["locale"] is not None: + attrs["locale"] = options["locale"] opvs = 1 if domain == "com.microsoft" else op_version if stop_words: attrs["stopwords"] = list(sorted(stop_words)) @@ -535,5 +547,6 @@ def convert_sklearn_text_vectorizer( "separators": None, "nan": [True, False], "keep_empty_string": [True, False], + "locale": None, }, ) diff --git a/skl2onnx/operator_converters/tfidf_vectoriser.py b/skl2onnx/operator_converters/tfidf_vectoriser.py index a624b90b9..a3d2e9b44 100644 --- a/skl2onnx/operator_converters/tfidf_vectoriser.py +++ b/skl2onnx/operator_converters/tfidf_vectoriser.py @@ -56,5 +56,6 @@ def convert_sklearn_tfidf_vectoriser( "separators": None, "nan": [True, False], "keep_empty_string": [True, False], + "locale": None, }, ) diff --git a/skl2onnx/sklapi/sklearn_text_onnx.py b/skl2onnx/sklapi/sklearn_text_onnx.py index e3ae5e32b..def8ae544 100644 --- a/skl2onnx/sklapi/sklearn_text_onnx.py +++ b/skl2onnx/sklapi/sklearn_text_onnx.py @@ -22,6 +22,7 @@ def register(): "separators": None, "nan": [True, False], "keep_empty_string": [True, False], + "locale": None, }, ) @@ -35,5 +36,6 @@ def register(): "separators": None, "nan": [True, False], "keep_empty_string": [True, False], + "locale": None, }, ) diff --git a/tests/test_onnxruntime.py b/tests/test_onnxruntime.py index 28ec705ae..c40b915f4 100644 --- a/tests/test_onnxruntime.py +++ b/tests/test_onnxruntime.py @@ -2,6 +2,7 @@ import os import unittest +import packaging.version as pv import numpy as np from numpy.testing import assert_allclose @@ -10,7 +11,7 @@ from test_utils import ReferenceEvaluatorEx except ImportError: ReferenceEvaluatorEx = None -from onnxruntime import InferenceSession +from onnxruntime import InferenceSession, __version__ as ort_version class TestOnnxruntime(unittest.TestCase): @@ -87,6 +88,10 @@ class TestOnnxruntime(unittest.TestCase): ) @unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.12.0"), + reason="ai.opset.ml==3 not implemented", + ) def test_tree_ensemble_classifier(self): """ The onnx graph was produced by the following code. @@ -127,6 +132,10 @@ def test_tree_ensemble_classifier(self): assert_allclose(labelo, label) @unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.12.0"), + reason="ai.opset.ml==3 not implemented", + ) def test_tree_ensemble_classifier_2(self): X = self.X3_15 name = os.path.join(os.path.dirname(__file__), "datasets", "treecl2.onnx") @@ -138,6 +147,10 @@ def test_tree_ensemble_classifier_2(self): assert_allclose(labelo, label) @unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old") + @unittest.skipIf( + pv.Version(ort_version) < pv.Version("1.12.0"), + reason="ai.opset.ml==3 not implemented", + ) def test_tree_ensemble_classifier_3(self): X = self.X3_15[:, :10] name = os.path.join(os.path.dirname(__file__), "datasets", "treecl3.onnx") diff --git a/tests/test_sklearn_count_vectorizer_converter.py b/tests/test_sklearn_count_vectorizer_converter.py index eb7f1cc66..cf21218a4 100644 --- a/tests/test_sklearn_count_vectorizer_converter.py +++ b/tests/test_sklearn_count_vectorizer_converter.py @@ -4,6 +4,7 @@ Tests scikit-learn's CountVectorizer converter. """ import unittest +import sys import numpy from sklearn.feature_extraction.text import CountVectorizer from skl2onnx import convert_sklearn @@ -130,7 +131,39 @@ def test_model_count_vectorizer_binary(self): basename="SklearnCountVectorizerBinary-OneOff-SklCol", ) + @unittest.skipIf(TARGET_OPSET < 10, reason="not available") + def test_model_count_vectorizer11_locale(self): + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) + vect = CountVectorizer(ngram_range=(1, 1)) + vect.fit(corpus.ravel()) + locale = "en_US" + options = {CountVectorizer: {"locale": locale}} + model_onnx = convert_sklearn( + vect, + "CountVectorizer", + [("input", StringTensorType([1]))], + target_opset=TARGET_OPSET, + options=options, + ) + self.assertIn('name: "locale"', str(model_onnx)) + self.assertIn(f's: "{locale}"', str(model_onnx)) + self.assertTrue(model_onnx is not None) + if sys.platform == "win32": + # Linux fails due to misconfiguration with langage-pack-en. + dump_data_and_model( + corpus, + vect, + model_onnx, + basename="SklearnCountVectorizer11Locale-OneOff-SklCol", + ) + if __name__ == "__main__": - TestSklearnCountVectorizer().test_model_count_vectorizer12() unittest.main(verbosity=2) diff --git a/tests/test_sklearn_glm_classifier_converter.py b/tests/test_sklearn_glm_classifier_converter.py index c34ffd3c6..e47396f3a 100644 --- a/tests/test_sklearn_glm_classifier_converter.py +++ b/tests/test_sklearn_glm_classifier_converter.py @@ -5,7 +5,7 @@ import numpy as np from numpy.testing import assert_almost_equal import sklearn -from sklearn import linear_model +from sklearn import linear_model, __version__ as sklearn_version from sklearn.svm import LinearSVC from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.exceptions import ConvergenceWarning @@ -33,6 +33,7 @@ ort_version = ort_version.split("+")[0] +skl_version = ".".join(sklearn_version.split(".")[:2]) def _sklearn_version(): @@ -509,6 +510,14 @@ def test_model_linear_svc_bool(self): self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVCBool") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_binary(self): model, X = fit_classification_model(linear_model.RidgeClassifier(), 2) @@ -521,6 +530,14 @@ def test_model_ridge_classifier_binary(self): self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierBin") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_binary_nozipmap(self): model, X = fit_classification_model( @@ -560,6 +577,14 @@ def test_model_ridge_classifier_binary_nozipmap(self): X, model, model_onnx, basename="SklearnRidgeClassifierNZMBin" ) + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_binary_mispelled_zipmap(self): model, X = fit_classification_model( @@ -579,6 +604,14 @@ def test_model_ridge_classifier_binary_mispelled_zipmap(self): except NameError as e: assert "Option 'zipmap ' not in" in str(e) + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_binary_mispelled_zipmap_wrong_value(self): model, X = fit_classification_model( @@ -598,6 +631,14 @@ def test_model_ridge_classifier_binary_mispelled_zipmap_wrong_value(self): except ValueError as e: assert "Unexpected value ['True'] for option 'zipmap'" in str(e) + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_multi_class(self): model, X = fit_classification_model(linear_model.RidgeClassifier(), 5) @@ -612,6 +653,14 @@ def test_model_ridge_classifier_multi_class(self): X, model, model_onnx, basename="SklearnRidgeClassifierMulti" ) + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_int(self): model, X = fit_classification_model( @@ -626,6 +675,14 @@ def test_model_ridge_classifier_int(self): self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierInt") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_classifier_bool(self): model, X = fit_classification_model( diff --git a/tests/test_sklearn_glm_regressor_converter.py b/tests/test_sklearn_glm_regressor_converter.py index c23ffaca5..063fa43fd 100644 --- a/tests/test_sklearn_glm_regressor_converter.py +++ b/tests/test_sklearn_glm_regressor_converter.py @@ -14,7 +14,7 @@ # scikit-learn < 0.22 from sklearn.utils.testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning -from sklearn import linear_model +from sklearn import linear_model, __version__ as sklearn_version from sklearn.datasets import make_regression from sklearn.ensemble import GradientBoostingRegressor from sklearn.neural_network import MLPRegressor @@ -52,6 +52,7 @@ ort_version = ort_version.split("+")[0] +skl_version = ".".join(sklearn_version.split(".")[:2]) class TestGLMRegressorConverter(unittest.TestCase): @@ -225,7 +226,15 @@ def test_model_linear_svr_bool(self): self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVRBool") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) + @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge(self): model, X = fit_regression_model(linear_model.Ridge()) model_onnx = convert_sklearn( @@ -237,7 +246,15 @@ def test_model_ridge(self): self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnRidge-Dec4") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) + @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_int(self): model, X = fit_regression_model(linear_model.Ridge(), is_int=True) model_onnx = convert_sklearn( @@ -249,7 +266,15 @@ def test_model_ridge_int(self): self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeInt-Dec4") - @ignore_warnings(category=(FutureWarning, ConvergenceWarning)) + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) + @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning)) def test_model_ridge_bool(self): model, X = fit_regression_model(linear_model.Ridge(), is_bool=True) model_onnx = convert_sklearn( diff --git a/tests/test_sklearn_imputer_converter.py b/tests/test_sklearn_imputer_converter.py index c514651c9..e1841b945 100644 --- a/tests/test_sklearn_imputer_converter.py +++ b/tests/test_sklearn_imputer_converter.py @@ -21,6 +21,8 @@ # changed in 0.20 SimpleImputer = None +from onnxruntime import __version__ as ort_version + from skl2onnx import convert_sklearn from skl2onnx.common.data_types import ( FloatTensorType, @@ -35,6 +37,7 @@ skl_ver = ".".join(sklearn.__version__.split(".")[:2]) +ort_version = ort_version.split("+")[0] class TestSklearnImputerConverter(unittest.TestCase): @@ -113,6 +116,14 @@ def test_simple_imputer_float_inputs(self): ) @unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_ver) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) def test_simple_imputer_float_inputs_int_mostf(self): model = SimpleImputer(strategy="most_frequent", fill_value="nan") data = [[1, 2], [np.nan, 3], [7, 6], [8, np.nan]] diff --git a/tests/test_sklearn_multi_output.py b/tests/test_sklearn_multi_output.py index 8c65c1220..a26cb58f2 100644 --- a/tests/test_sklearn_multi_output.py +++ b/tests/test_sklearn_multi_output.py @@ -15,11 +15,13 @@ except ImportError: from sklearn.utils.testing import ignore_warnings from sklearn import __version__ as skl_ver +from onnxruntime import __version__ as ort_version from skl2onnx import to_onnx from test_utils import dump_data_and_model, TARGET_OPSET skl_ver = ".".join(skl_ver.split(".")[:2]) +ort_version = ort_version.split("+")[0] class TestMultiOutputConverter(unittest.TestCase): @@ -31,6 +33,14 @@ def setUp(self): # logging.basicConfig(level=logging.DEBUG) pass + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_ver) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) def test_multi_output_regressor(self): X, y = load_linnerud(return_X_y=True) clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y) diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py index d924e5ba5..76d6b04d7 100644 --- a/tests/test_sklearn_nearest_neighbour_converter.py +++ b/tests/test_sklearn_nearest_neighbour_converter.py @@ -18,7 +18,7 @@ except ImportError: # older versions of scikit-learn from sklearn.utils.testing import ignore_warnings -from sklearn import datasets +from sklearn import datasets, __version__ as sklearn_version from sklearn.model_selection import train_test_split from sklearn.neighbors import ( KNeighborsRegressor, @@ -67,6 +67,7 @@ def dont_test_radius(): ort_version = ".".join(ort_version.split(".")[:2]) +skl_version = ".".join(sklearn_version.split(".")[:2]) class TestNearestNeighbourConverter(unittest.TestCase): @@ -795,6 +796,14 @@ def test_onnx_test_knn_transform(self): assert_almost_equal(ind, y[0]) @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("1.11.0"), + reason="onnxruntime not recent enough", + ) + @unittest.skipIf( + pv.Version(skl_version) <= pv.Version("1.1.0"), + reason="sklearn fails on windows", + ) @ignore_warnings(category=DeprecationWarning) def test_sklearn_nca_default(self): model, X_test = fit_classification_model( @@ -810,6 +819,9 @@ def test_sklearn_nca_default(self): dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADefault") @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22") + @unittest.skipIf( + pv.Version(sklearn_version) < pv.Version("1.1.0"), reason="n-d not supported" + ) @ignore_warnings(category=DeprecationWarning) def test_sklearn_nca_identity(self): model, X_test = fit_classification_model( @@ -828,6 +840,9 @@ def test_sklearn_nca_identity(self): dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCAIdentity") @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22") + @unittest.skipIf( + pv.Version(sklearn_version) < pv.Version("1.1.0"), reason="n-d not supported" + ) @ignore_warnings(category=DeprecationWarning) def test_sklearn_nca_double(self): model, X_test = fit_classification_model( @@ -845,6 +860,9 @@ def test_sklearn_nca_double(self): dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADouble") @unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22") + @unittest.skipIf( + pv.Version(sklearn_version) < pv.Version("1.1.0"), reason="n-d not supported" + ) @ignore_warnings(category=DeprecationWarning) def test_sklearn_nca_int(self): model, X_test = fit_classification_model( diff --git a/tests/test_sklearn_power_transformer.py b/tests/test_sklearn_power_transformer.py index 2f9d3141e..c8177b22b 100644 --- a/tests/test_sklearn_power_transformer.py +++ b/tests/test_sklearn_power_transformer.py @@ -4,6 +4,7 @@ Test scikit-learn's PowerTransform """ import unittest +import packaging.version as pv import numpy as np @@ -157,6 +158,9 @@ def test_powertransformer_box_cox_with_scaler(self): dump_data_and_model(data, model, model_onnx, basename="PowerTransformer") @unittest.skipIf(PowerTransformer is None, "Problems with import occurred") + @unittest.skipIf( + pv.Version(sklearn.__version__) < pv.Version("1.3.0"), reason="PR #26566" + ) def test_powertransformer_zeros(self): pt = PowerTransformer() data = np.array([[0, 0], [0, 0]], dtype=np.float32) diff --git a/tests/test_sklearn_random_forest_converters.py b/tests/test_sklearn_random_forest_converters.py index d0ea0a6f4..c4b2f9db2 100644 --- a/tests/test_sklearn_random_forest_converters.py +++ b/tests/test_sklearn_random_forest_converters.py @@ -447,7 +447,7 @@ def common_test_model_hgb_classifier(self, add_nan=False, n_classes=2): self.assertIsNotNone(model_onnx) X_test = X_test.astype(numpy.float32)[:5] - # There is a bug in onnxruntime <= 1.1.0. + # There is a bug in onnxruntime <= 1.11.0. # Raw scores are always positive. dump_data_and_model( X_test, diff --git a/tests/test_sklearn_tfidf_vectorizer_converter.py b/tests/test_sklearn_tfidf_vectorizer_converter.py index 69c2eb445..f72f9080f 100644 --- a/tests/test_sklearn_tfidf_vectorizer_converter.py +++ b/tests/test_sklearn_tfidf_vectorizer_converter.py @@ -4,8 +4,9 @@ Tests scikit-learn's tfidf converter. """ import unittest -import packaging.version as pv import copy +import sys +import packaging.version as pv import numpy from numpy.testing import assert_almost_equal from sklearn.feature_extraction.text import TfidfVectorizer @@ -615,6 +616,48 @@ def test_model_tfidf_vectorizer11_custom_vocabulary(self): basename="SklearnTfidfVectorizer11CustomVocab-OneOff-SklCol", ) + @unittest.skipIf(TARGET_OPSET < 10, reason="not available") + @unittest.skipIf( + pv.Version(ort_version) <= pv.Version("0.3.0"), reason="Requires opset 9." + ) + def test_model_tfidf_vectorizer_locale(self): + corpus = numpy.array( + [ + "This is the first document.", + "This document is the second document.", + "And this is the third one.", + "Is this the first document?", + ] + ).reshape((4, 1)) + vect = TfidfVectorizer(ngram_range=(1, 1), norm=None) + vect.fit(corpus.ravel()) + locale = "en_US" + options = self.get_options() + options[TfidfVectorizer].update({"locale": locale}) + model_onnx = convert_sklearn( + vect, + "TfidfVectorizer", + [("input", StringTensorType())], + options=options, + target_opset=TARGET_OPSET, + ) + self.assertIn('name: "locale"', str(model_onnx)) + self.assertIn(f's: "{locale}"', str(model_onnx)) + if sys.platform == "win32": + # Linux fails due to misconfiguration with langage-pack-en. + dump_data_and_model( + corpus, + vect, + model_onnx, + basename="SklearnTfidfVectorizer11Locale-OneOff-SklCol", + ) + + sess = InferenceSession( + model_onnx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + res = sess.run(None, {"input": corpus.ravel()})[0] + assert res.shape == (4, 9) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py index a336ebf56..51665b005 100644 --- a/tests/test_utils/__init__.py +++ b/tests/test_utils/__init__.py @@ -81,8 +81,7 @@ def _get_ir_version(opv): def max_onnxruntime_opset(): """ See `Versioning.md - `_. + `_. """ vi = pv.Version(ort_version.split("+")[0]) if vi >= pv.Version("1.14.0"):