diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml
index 40d9ff887..c99ae8bb4 100644
--- a/.azure-pipelines/linux-conda-CI.yml
+++ b/.azure-pipelines/linux-conda-CI.yml
@@ -200,6 +200,12 @@ jobs:
environmentName: 'py$(python.version)'
packageSpecs: 'python=$(python.version)'
+ - script: |
+ sudo apt-get install -y language-pack-en
+ sudo locale-gen en_US.UTF-8
+ sudo update-locale LANG=en_US.UTF-8
+ displayName: 'Install packages'
+
- script: |
test '$(python.version)' == '3.7' && apt-get install protobuf-compiler libprotoc-dev
conda config --set always_yes yes --set changeps1 no
diff --git a/.gitignore b/.gitignore
index 377561ae0..4a5128a88 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,3 +68,4 @@ docs/tutorial/*.onnx
docs/tutorial/*.jpg
docs/tutorial/*.png
docs/tutorial/*.dot
+docs/tutorial/catboost_info
diff --git a/CHANGELOGS.md b/CHANGELOGS.md
new file mode 100644
index 000000000..929eda009
--- /dev/null
+++ b/CHANGELOGS.md
@@ -0,0 +1,8 @@
+Change Logs
+===========
+
+1.16.0
+++++++
+
+* add option 'language' to converters of CountVectorizer, TfIdfVectorizer
+ [#1020](https://github.com/onnx/sklearn-onnx/pull/1020)
diff --git a/README.md b/README.md
index 6f79fd921..6ac428186 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,9 @@
-[![Build Status Linux](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.linux.CI?branchName=refs%2Fpull%2F1009%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=21&branchName=refs%2Fpull%2F1009%2Fmerge)
+[![Build Status](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.linux.CI?branchName=refs%2Fpull%2F1020%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=21&branchName=refs%2Fpull%2F1020%2Fmerge)
-[![Build Status Windows](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.win.CI?branchName=refs%2Fpull%2F1009%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=22&branchName=refs%2Fpull%2F1009%2Fmerge)
+[![Build Status](https://dev.azure.com/onnxmltools/sklearn-onnx/_apis/build/status%2Fonnx.sklearn-onnx.win.CI?branchName=refs%2Fpull%2F1020%2Fmerge)](https://dev.azure.com/onnxmltools/sklearn-onnx/_build/latest?definitionId=22&branchName=refs%2Fpull%2F1020%2Fmerge)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
@@ -33,6 +33,38 @@ Or you can install from the source with the latest changes.
pip install git+https://github.com/onnx/sklearn-onnx.git
```
+## Getting started
+
+```python
+# Train a model.
+import numpy as np
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+
+iris = load_iris()
+X, y = iris.data, iris.target
+X = X.astype(np.float32)
+X_train, X_test, y_train, y_test = train_test_split(X, y)
+clr = RandomForestClassifier()
+clr.fit(X_train, y_train)
+
+# Convert into ONNX format.
+from skl2onnx import to_onnx
+
+onx = to_onnx(clr, X[:1])
+with open("rf_iris.onnx", "wb") as f:
+ f.write(onx.SerializeToString())
+
+# Compute the prediction with onnxruntime.
+import onnxruntime as rt
+
+sess = rt.InferenceSession("rf_iris.onnx", providers=["CPUExecutionProvider"])
+input_name = sess.get_inputs()[0].name
+label_name = sess.get_outputs()[0].name
+pred_onx = sess.run([label_name], {input_name: X_test.astype(np.float32)})[0]
+```
+
## Contribute
We welcome contributions in the form of feedback, ideas, or code.
diff --git a/docs/api_summary.rst b/docs/api_summary.rst
index 16041defc..a8c713518 100644
--- a/docs/api_summary.rst
+++ b/docs/api_summary.rst
@@ -45,7 +45,6 @@ it is possible to enable logging:
import logging
logger = logging.getLogger('skl2onnx')
logger.setLevel(logging.DEBUG)
- logging.basicConfig(level=logging.DEBUG)
Example :ref:`l-example-logging` illustrates what it looks like.
diff --git a/docs/conf.py b/docs/conf.py
index a730d3b02..bda240463 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -5,6 +5,7 @@
import os
import sys
+import logging
import warnings
import skl2onnx
@@ -72,16 +73,14 @@
linkcode_resolve = make_linkcode_resolve(
"skl2onnx",
- "https://github.com/onnx/skl2onnx/blob/{revision}/" "{package}/{path}#L{lineno}",
+ "https://github.com/onnx/skl2onnx/blob/{revision}/{package}/{path}#L{lineno}",
)
intersphinx_mapping = {
"joblib": ("https://joblib.readthedocs.io/en/latest/", None),
"python": ("https://docs.python.org/{.major}".format(sys.version_info), None),
"matplotlib": ("https://matplotlib.org/", None),
- "mlinsights": ("http://www.xavierdupre.fr/app/mlinsights/helpsphinx/", None),
"numpy": ("https://docs.scipy.org/doc/numpy/", None),
- "pyquickhelper": ("http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/", None),
"onnxruntime": ("https://onnxruntime.ai/docs/api/python/", None),
"pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
"scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
@@ -144,4 +143,14 @@
def setup(app):
# Placeholder to initialize the folder before
# generating the documentation.
+ logger = logging.getLogger("skl2onnx")
+ logger.setLevel(logging.WARNING)
+ logger = logging.getLogger("matplotlib.font_manager")
+ logger.setLevel(logging.WARNING)
+ logger = logging.getLogger("matplotlib.ticker")
+ logger.setLevel(logging.WARNING)
+ logger = logging.getLogger("PIL.PngImagePlugin")
+ logger.setLevel(logging.WARNING)
+ logger = logging.getLogger("graphviz._tools")
+ logger.setLevel(logging.WARNING)
return app
diff --git a/docs/examples/plot_convert_model.py b/docs/examples/plot_convert_model.py
index af00277ef..c4bb1e7c6 100644
--- a/docs/examples/plot_convert_model.py
+++ b/docs/examples/plot_convert_model.py
@@ -69,7 +69,7 @@
with open("logreg_iris.onnx", "wb") as f:
f.write(onx.SerializeToString())
-sess = rt.InferenceSession("logreg_iris.onnx")
+sess = rt.InferenceSession("logreg_iris.onnx", providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
diff --git a/docs/examples/plot_convert_syntax.py b/docs/examples/plot_convert_syntax.py
index bdff42218..d6f16ddbe 100644
--- a/docs/examples/plot_convert_syntax.py
+++ b/docs/examples/plot_convert_syntax.py
@@ -31,7 +31,7 @@
def predict_with_onnxruntime(onx, X):
- sess = InferenceSession(onx.SerializeToString())
+ sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
res = sess.run(None, {input_name: X.astype(np.float32)})
return res[0]
diff --git a/docs/examples/plot_convert_zipmap.py b/docs/examples/plot_convert_zipmap.py
index fbe48207b..e2a1f5385 100644
--- a/docs/examples/plot_convert_zipmap.py
+++ b/docs/examples/plot_convert_zipmap.py
@@ -48,7 +48,7 @@
# Let's confirm the output type of the probabilities
# is a list of dictionaries with onnxruntime.
-sess = rt.InferenceSession(onx.SerializeToString())
+sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
res = sess.run(None, {"float_input": X_test.astype(numpy.float32)})
print(res[1][:2])
print("probabilities type:", type(res[1]))
@@ -66,7 +66,9 @@
clr, initial_types=initial_type, options=options, target_opset=12
)
-sess2 = rt.InferenceSession(onx2.SerializeToString())
+sess2 = rt.InferenceSession(
+ onx2.SerializeToString(), providers=["CPUExecutionProvider"]
+)
res2 = sess2.run(None, {"float_input": X_test.astype(numpy.float32)})
print(res2[1][:2])
print("probabilities type:", type(res2[1]))
@@ -85,7 +87,9 @@
clr, initial_types=initial_type, options=options, target_opset=12
)
-sess3 = rt.InferenceSession(onx3.SerializeToString())
+sess3 = rt.InferenceSession(
+ onx3.SerializeToString(), providers=["CPUExecutionProvider"]
+)
res3 = sess3.run(None, {"float_input": X_test.astype(numpy.float32)})
for i, out in enumerate(sess3.get_outputs()):
print(
diff --git a/docs/examples/plot_custom_model.py b/docs/examples/plot_custom_model.py
index ac14d66c3..5a35811c0 100644
--- a/docs/examples/plot_custom_model.py
+++ b/docs/examples/plot_custom_model.py
@@ -410,7 +410,7 @@ def predictable_tsne_converter(scope, operator, container):
##########################
# Predictions with onnxruntime.
-sess = rt.InferenceSession("predictable_tsne.onnx")
+sess = rt.InferenceSession("predictable_tsne.onnx", providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, {"input": X_test[:1].astype(numpy.float32)})
print("transform", pred_onx[0])
diff --git a/docs/examples/plot_custom_parser.py b/docs/examples/plot_custom_parser.py
index ecbe99f95..64102ab79 100644
--- a/docs/examples/plot_custom_parser.py
+++ b/docs/examples/plot_custom_parser.py
@@ -259,7 +259,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
X32 = X_test[:5].astype(np.float32)
-sess = rt.InferenceSession(model_onnx.SerializeToString())
+sess = rt.InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
results = sess.run(None, {"X": X32})
print("--labels--")
diff --git a/docs/examples/plot_custom_parser_alternative.py b/docs/examples/plot_custom_parser_alternative.py
index 48b41a0b1..9a28f5103 100644
--- a/docs/examples/plot_custom_parser_alternative.py
+++ b/docs/examples/plot_custom_parser_alternative.py
@@ -236,7 +236,9 @@ def validator_classifier_parser(scope, model, inputs, custom_parsers=None):
X32 = X_test[:5].astype(np.float32)
-sess = rt.InferenceSession(model_onnx.SerializeToString())
+sess = rt.InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
results = sess.run(None, {"X": X32})
print("--labels--")
diff --git a/docs/examples/plot_errors_onnxruntime.py b/docs/examples/plot_errors_onnxruntime.py
index 5e92fdd30..0d364d5d9 100644
--- a/docs/examples/plot_errors_onnxruntime.py
+++ b/docs/examples/plot_errors_onnxruntime.py
@@ -41,7 +41,7 @@
)
example2 = "logreg_iris.onnx"
-sess = rt.InferenceSession(example2)
+sess = rt.InferenceSession(example2, providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
output_name = sess.get_outputs()[0].name
diff --git a/docs/examples/plot_gpr.py b/docs/examples/plot_gpr.py
index b38412ecd..c97064d21 100644
--- a/docs/examples/plot_gpr.py
+++ b/docs/examples/plot_gpr.py
@@ -51,7 +51,7 @@
initial_type = [("X", FloatTensorType([None, X_train.shape[1]]))]
onx = convert_sklearn(gpr, initial_types=initial_type, target_opset=12)
-sess = rt.InferenceSession(onx.SerializeToString())
+sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
try:
pred_onx = sess.run(None, {"X": X_test.astype(numpy.float32)})[0]
except RuntimeError as e:
@@ -74,7 +74,7 @@
initial_type = [("X", FloatTensorType([None, None]))]
onx = convert_sklearn(gpr, initial_types=initial_type, target_opset=12)
-sess = rt.InferenceSession(onx.SerializeToString())
+sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, {"X": X_test.astype(numpy.float32)})[0]
pred_skl = gpr.predict(X_test)
@@ -111,7 +111,9 @@
initial_type = [("X", DoubleTensorType([None, None]))]
onx64 = convert_sklearn(gpr, initial_types=initial_type, target_opset=12)
-sess64 = rt.InferenceSession(onx64.SerializeToString())
+sess64 = rt.InferenceSession(
+ onx64.SerializeToString(), providers=["CPUExecutionProvider"]
+)
pred_onx64 = sess64.run(None, {"X": X_test})[0]
print(pred_onx64[0, :10])
@@ -169,7 +171,9 @@
gpr, initial_types=initial_type, options=options, target_opset=12
)
-sess64_std = rt.InferenceSession(onx64_std.SerializeToString())
+sess64_std = rt.InferenceSession(
+ onx64_std.SerializeToString(), providers=["CPUExecutionProvider"]
+)
pred_onx64_std = sess64_std.run(None, {"X": X_test[:5]})
pprint.pprint(pred_onx64_std)
diff --git a/docs/examples/plot_intermediate_outputs.py b/docs/examples/plot_intermediate_outputs.py
index 98937ce83..5c3b6493f 100644
--- a/docs/examples/plot_intermediate_outputs.py
+++ b/docs/examples/plot_intermediate_outputs.py
@@ -195,7 +195,7 @@ def convert_dataframe_schema(df, drop=None):
################################
# We are ready to run *onnxruntime*.
-sess = rt.InferenceSession("pipeline_titanic.onnx")
+sess = rt.InferenceSession("pipeline_titanic.onnx", providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, inputs)
print("predict", pred_onx[0][:5])
print("predict_proba", pred_onx[1][:1])
@@ -228,7 +228,9 @@ def convert_dataframe_schema(df, drop=None):
################################
# Let's compute the numerical features.
-sess = rt.InferenceSession("pipeline_titanic_numerical.onnx")
+sess = rt.InferenceSession(
+ "pipeline_titanic_numerical.onnx", providers=["CPUExecutionProvider"]
+)
numX = sess.run(None, inputs)
print("numerical features", numX[0][:1])
@@ -238,7 +240,9 @@ def convert_dataframe_schema(df, drop=None):
print(model_onnx)
text_onnx = select_model_inputs_outputs(model_onnx, "variable2")
save_onnx_model(text_onnx, "pipeline_titanic_textual.onnx")
-sess = rt.InferenceSession("pipeline_titanic_textual.onnx")
+sess = rt.InferenceSession(
+ "pipeline_titanic_textual.onnx", providers=["CPUExecutionProvider"]
+)
numT = sess.run(None, inputs)
print("textual features", numT[0][:1])
diff --git a/docs/examples/plot_investigate_pipeline.py b/docs/examples/plot_investigate_pipeline.py
index b131d4fb8..2f532d139 100644
--- a/docs/examples/plot_investigate_pipeline.py
+++ b/docs/examples/plot_investigate_pipeline.py
@@ -55,7 +55,9 @@
initial_types = [("input", FloatTensorType((None, X_digits.shape[1])))]
model_onnx = convert_sklearn(pipe, initial_types=initial_types, target_opset=12)
-sess = rt.InferenceSession(model_onnx.SerializeToString())
+sess = rt.InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
print("skl predict_proba")
print(pipe.predict_proba(X_digits[:2]))
onx_pred = sess.run(None, {"input": X_digits[:2].astype(np.float32)})[1]
@@ -82,7 +84,9 @@
for i, step in enumerate(steps):
onnx_step = step["onnx_step"]
- sess = rt.InferenceSession(onnx_step.SerializeToString())
+ sess = rt.InferenceSession(
+ onnx_step.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
onnx_outputs = sess.run(None, {"input": X_digits[:2].astype(np.float32)})
skl_outputs = step["model"]._debug.outputs
print("step 1", type(step["model"]))
diff --git a/docs/examples/plot_logging.py b/docs/examples/plot_logging.py
index 724d35657..39fd7ac03 100644
--- a/docs/examples/plot_logging.py
+++ b/docs/examples/plot_logging.py
@@ -46,7 +46,7 @@
onx = convert_sklearn(clr, initial_types=initial_type, target_opset=12)
-sess = rt.InferenceSession(onx.SerializeToString())
+sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
@@ -74,7 +74,6 @@
logger = logging.getLogger("skl2onnx")
logger.setLevel(logging.DEBUG)
-logging.basicConfig(level=logging.DEBUG)
convert_sklearn(clr, initial_types=initial_type, target_opset=12)
@@ -82,10 +81,10 @@
# And to disable it.
logger.setLevel(logging.INFO)
-logging.basicConfig(level=logging.INFO)
convert_sklearn(clr, initial_types=initial_type, target_opset=12)
+logger.setLevel(logging.WARNING)
#################################
# **Versions used for this example**
diff --git a/docs/examples/plot_metadata.py b/docs/examples/plot_metadata.py
index 3b3631164..fc0bde3c6 100644
--- a/docs/examples/plot_metadata.py
+++ b/docs/examples/plot_metadata.py
@@ -39,7 +39,7 @@
#############################
# With *ONNX Runtime*:
-sess = InferenceSession(example)
+sess = InferenceSession(example, providers=["CPUExecutionProvider"])
meta = sess.get_modelmeta()
print("custom_metadata_map={}".format(meta.custom_metadata_map))
diff --git a/docs/examples/plot_nmf.py b/docs/examples/plot_nmf.py
index 4b9be8605..89e8acb02 100644
--- a/docs/examples/plot_nmf.py
+++ b/docs/examples/plot_nmf.py
@@ -113,7 +113,9 @@ def nmf_to_onnx(W, H, op_version=12):
########################################
# Let's compute prediction with it.
-sess = InferenceSession(model_onnx.SerializeToString())
+sess = InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
def predict_onnx(sess, row_indices, col_indices):
diff --git a/docs/examples/plot_onnx_operators.py b/docs/examples/plot_onnx_operators.py
index 9a72fd884..cd7d7af1b 100644
--- a/docs/examples/plot_onnx_operators.py
+++ b/docs/examples/plot_onnx_operators.py
@@ -153,7 +153,9 @@
def predict_with_onnxruntime(model_def, *inputs):
import onnxruntime as ort
- sess = ort.InferenceSession(model_def.SerializeToString())
+ sess = ort.InferenceSession(
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
names = [i.name for i in sess.get_inputs()]
dinputs = {name: input for name, input in zip(names, inputs)}
res = sess.run(None, dinputs)
diff --git a/docs/examples/plot_pipeline_lightgbm.py b/docs/examples/plot_pipeline_lightgbm.py
index fb584e5a1..fe89480a6 100644
--- a/docs/examples/plot_pipeline_lightgbm.py
+++ b/docs/examples/plot_pipeline_lightgbm.py
@@ -112,7 +112,9 @@
# Predictions with onnxruntime.
try:
- sess = rt.InferenceSession("pipeline_lightgbm.onnx")
+ sess = rt.InferenceSession(
+ "pipeline_lightgbm.onnx", providers=["CPUExecutionProvider"]
+ )
except OrtFail as e:
print(e)
print("The converter requires onnxmltools>=1.7.0")
diff --git a/docs/examples/plot_pipeline_xgboost.py b/docs/examples/plot_pipeline_xgboost.py
index 3feaa6d87..69e4ef4db 100644
--- a/docs/examples/plot_pipeline_xgboost.py
+++ b/docs/examples/plot_pipeline_xgboost.py
@@ -128,7 +128,7 @@
##########################
# Predictions with onnxruntime.
-sess = rt.InferenceSession("pipeline_xgboost.onnx")
+sess = rt.InferenceSession("pipeline_xgboost.onnx", providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)})
print("predict", pred_onx[0])
print("predict_proba", pred_onx[1][:1])
diff --git a/docs/examples/plot_tfidfvectorizer.py b/docs/examples/plot_tfidfvectorizer.py
index 290eb5e36..a4e849f7d 100644
--- a/docs/examples/plot_tfidfvectorizer.py
+++ b/docs/examples/plot_tfidfvectorizer.py
@@ -197,7 +197,7 @@ def transform(self, posts):
##########################
# Predictions with onnxruntime.
-sess = rt.InferenceSession("pipeline_tfidf.onnx")
+sess = rt.InferenceSession("pipeline_tfidf.onnx", providers=["CPUExecutionProvider"])
print("---", train_data[0])
inputs = {"input": train_data[:1]}
pred_onx = sess.run(None, inputs)
diff --git a/docs/index.rst b/docs/index.rst
index 02ba5ade9..bfff99e39 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -69,33 +69,36 @@ to automatically check every converter with
`onnxruntime-gpu `_.
Every converter is tested with this backend.
+**Getting started**
+
::
- # Train a model.
+ import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
+
iris = load_iris()
X, y = iris.data, iris.target
+ X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = RandomForestClassifier()
clr.fit(X_train, y_train)
- # Convert into ONNX format
- from skl2onnx import convert_sklearn
- from skl2onnx.common.data_types import FloatTensorType
- initial_type = [('float_input', FloatTensorType([None, 4]))]
- onx = convert_sklearn(clr, initial_types=initial_type)
+ # Convert into ONNX format.
+ from skl2onnx import to_onnx
+
+ onx = to_onnx(clr, X[:1])
with open("rf_iris.onnx", "wb") as f:
f.write(onx.SerializeToString())
- # Compute the prediction with ONNX Runtime
+ # Compute the prediction with onnxruntime.
import onnxruntime as rt
- import numpy
+
sess = rt.InferenceSession("rf_iris.onnx", providers=["CPUExecutionProvider"])
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
- pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
+ pred_onx = sess.run([label_name], {input_name: X_test.astype(np.float32)})[0]
**Related converters**
diff --git a/docs/index_tutorial.rst b/docs/index_tutorial.rst
index 524ad0481..cb46f834b 100644
--- a/docs/index_tutorial.rst
+++ b/docs/index_tutorial.rst
@@ -25,6 +25,7 @@ The tutorial was tested with following version:
.. runpython::
:showcode:
+ import catboost
import numpy
import scipy
import sklearn
@@ -34,11 +35,10 @@ The tutorial was tested with following version:
import onnxruntime
import xgboost
import skl2onnx
- import pyquickhelper
- mods = [numpy, scipy, sklearn, lightgbm, xgboost,
+ mods = [numpy, scipy, sklearn, lightgbm, xgboost, catboost,
onnx, onnxmltools, onnxruntime,
- skl2onnx, pyquickhelper]
+ skl2onnx]
mods = [(m.__name__, m.__version__) for m in mods]
mx = max(len(_[0]) for _ in mods) + 1
for name, vers in sorted(mods):
diff --git a/docs/tutorial/plot_abegin_convert_pipeline.py b/docs/tutorial/plot_abegin_convert_pipeline.py
index 9596fe305..fae79bc22 100644
--- a/docs/tutorial/plot_abegin_convert_pipeline.py
+++ b/docs/tutorial/plot_abegin_convert_pipeline.py
@@ -65,7 +65,7 @@
#
# The first example uses :epkg:`onnxruntime`.
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
pred_ort = sess.run(None, {"X": X_test.astype(numpy.float32)})[0]
pred_skl = ereg.predict(X_test.astype(numpy.float32))
diff --git a/docs/tutorial/plot_bbegin_measure_time.py b/docs/tutorial/plot_bbegin_measure_time.py
index 823211506..3b6b83382 100644
--- a/docs/tutorial/plot_bbegin_measure_time.py
+++ b/docs/tutorial/plot_bbegin_measure_time.py
@@ -55,7 +55,7 @@
# We measure the processing time per observation whether
# or not an observation belongs to a batch or is a single one.
-sizes = [(1, 50), (10, 50), (1000, 10), (10000, 5)]
+sizes = [(1, 50), (10, 50), (100, 10)]
with config_context(assume_finite=True):
obs = []
diff --git a/docs/tutorial/plot_catwoe_transformer.py b/docs/tutorial/plot_catwoe_transformer.py
index bf9bed2fe..aca734bd0 100644
--- a/docs/tutorial/plot_catwoe_transformer.py
+++ b/docs/tutorial/plot_catwoe_transformer.py
@@ -131,7 +131,7 @@ def ordinal_encoder_converter(scope, operator, container):
ord_onx = to_onnx(enc, X[:1], target_opset=14)
-sess = InferenceSession(ord_onx.SerializeToString())
+sess = InferenceSession(ord_onx.SerializeToString(), providers=["CPUExecutionProvider"])
print(sess.run(None, {"X": X[:5]})[0])
######################################
@@ -232,5 +232,5 @@ def woe_encoder_converter(scope, operator, container):
woe_onx = to_onnx(woe, X[:1], target_opset=14)
-sess = InferenceSession(woe_onx.SerializeToString())
+sess = InferenceSession(woe_onx.SerializeToString(), providers=["CPUExecutionProvider"])
print(sess.run(None, {"X": X[:5]})[0])
diff --git a/docs/tutorial/plot_dbegin_options.py b/docs/tutorial/plot_dbegin_options.py
index 47b813f3a..e21a5df11 100644
--- a/docs/tutorial/plot_dbegin_options.py
+++ b/docs/tutorial/plot_dbegin_options.py
@@ -34,17 +34,18 @@
iris = load_iris()
X, y = iris.data, iris.target
X_train, _, y_train, __ = train_test_split(X, y, random_state=11)
- clr = LogisticRegression()
+ clr = LogisticRegression(max_iter=1000)
clr.fit(X_train, y_train)
model_def = to_onnx(clr, X_train.astype(numpy.float32))
- print(printable_graph(model_def))
+ print(printable_graph(model_def.graph))
This operator is not really efficient as it copies every probabilies and
labels in a different container. This time is usually significant for
small classifiers. Then it makes sense to remove it.
.. runpython::
+
import numpy
from onnx.helper import printable_graph
from sklearn.datasets import load_iris
@@ -55,12 +56,12 @@
iris = load_iris()
X, y = iris.data, iris.target
X_train, _, y_train, __ = train_test_split(X, y, random_state=11)
- clr = LogisticRegression()
+ clr = LogisticRegression(max_iter=1000)
clr.fit(X_train, y_train)
model_def = to_onnx(clr, X_train.astype(numpy.float32),
options={LogisticRegression: {'zipmap': False}})
- print(printable_graph(model_def))
+ print(printable_graph(model_def.graph))
There might be in the graph many classifiers, it is important to have
a way to specify which classifier should keep its *ZipMap*
@@ -199,7 +200,9 @@
X_train.astype(numpy.float32),
options={id(clrrf): {"decision_path": True, "zipmap": False}},
)
-sess = InferenceSession(model_def.SerializeToString())
+sess = InferenceSession(
+ model_def.SerializeToString(), providers=["CPUExecutionProvider"]
+)
##########################################
# The model produces 3 outputs.
diff --git a/docs/tutorial/plot_dbegin_options_zipmap.py b/docs/tutorial/plot_dbegin_options_zipmap.py
index d7eae7be8..2b71569c1 100644
--- a/docs/tutorial/plot_dbegin_options_zipmap.py
+++ b/docs/tutorial/plot_dbegin_options_zipmap.py
@@ -48,7 +48,7 @@
# The output type for the probabilities is a list of
# dictionaries.
-sess = rt.InferenceSession(onx.SerializeToString())
+sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
res = sess.run(None, {"X": X_test})
print(res[1][:2])
print("probabilities type:", type(res[1]))
@@ -64,7 +64,9 @@
options = {id(clr): {"zipmap": False}}
onx2 = to_onnx(clr, X_train, options=options, target_opset=12)
-sess2 = rt.InferenceSession(onx2.SerializeToString())
+sess2 = rt.InferenceSession(
+ onx2.SerializeToString(), providers=["CPUExecutionProvider"]
+)
res2 = sess2.run(None, {"X": X_test})
print(res2[1][:2])
print("probabilities type:", type(res2[1]))
@@ -81,7 +83,9 @@
options = {id(clr): {"zipmap": "columns"}}
onx3 = to_onnx(clr, X_train, options=options, target_opset=12)
-sess3 = rt.InferenceSession(onx3.SerializeToString())
+sess3 = rt.InferenceSession(
+ onx3.SerializeToString(), providers=["CPUExecutionProvider"]
+)
res3 = sess3.run(None, {"X": X_test})
for i, out in enumerate(sess3.get_outputs()):
print(
@@ -124,7 +128,9 @@
options = {id(clr): {"zipmap": False, "output_class_labels": True}}
onx4 = to_onnx(clr, X_train, options=options, target_opset=12)
-sess4 = rt.InferenceSession(onx4.SerializeToString())
+sess4 = rt.InferenceSession(
+ onx4.SerializeToString(), providers=["CPUExecutionProvider"]
+)
res4 = sess4.run(None, {"X": X_test})
print(res4[1][:2])
print("probabilities type:", type(res4[1]))
@@ -159,7 +165,9 @@
onx5 = to_onnx(clr, X_train, target_opset=12)
-sess5 = rt.InferenceSession(onx5.SerializeToString())
+sess5 = rt.InferenceSession(
+ onx5.SerializeToString(), providers=["CPUExecutionProvider"]
+)
res5 = sess5.run(None, {"X": X_test[:3]})
print(res5)
@@ -174,7 +182,9 @@
options={"zipmap": False, "output_class_labels": True},
)
-sess6 = rt.InferenceSession(onx6.SerializeToString())
+sess6 = rt.InferenceSession(
+ onx6.SerializeToString(), providers=["CPUExecutionProvider"]
+)
res6 = sess6.run(None, {"X": X_test[:3]})
print("predicted labels", res6[0])
print("predicted probabilies", res6[1])
diff --git a/docs/tutorial/plot_ebegin_float_double.py b/docs/tutorial/plot_ebegin_float_double.py
index f61db244c..e3fc22dd0 100644
--- a/docs/tutorial/plot_ebegin_float_double.py
+++ b/docs/tutorial/plot_ebegin_float_double.py
@@ -147,7 +147,7 @@ def diff(p1, p2):
onx = to_onnx(model, Xi_train[:1].astype(numpy.float32), target_opset=15)
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
X32 = Xi_test.astype(numpy.float32)
@@ -207,7 +207,7 @@ def diff(p1, p2):
onx2 = to_onnx(model2, Xi_train[:1].astype(numpy.float32), target_opset=15)
-sess2 = InferenceSession(onx2.SerializeToString())
+sess2 = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"])
skl2 = model2.predict(X32)
ort2 = sess2.run(None, {"X": X32})[0]
@@ -238,7 +238,7 @@ def diff(p1, p2):
target_opset=15,
)
-sess3 = InferenceSession(onx3.SerializeToString())
+sess3 = InferenceSession(onx3.SerializeToString(), providers=["CPUExecutionProvider"])
skl3 = model3.predict(X32)
ort3 = sess3.run(None, {"X": X32})[0]
diff --git a/docs/tutorial/plot_gbegin_cst.py b/docs/tutorial/plot_gbegin_cst.py
index 0b4e21c18..1f3a47902 100644
--- a/docs/tutorial/plot_gbegin_cst.py
+++ b/docs/tutorial/plot_gbegin_cst.py
@@ -52,7 +52,7 @@
# Inference
# +++++++++
-sess = InferenceSession(new_onx.SerializeToString())
+sess = InferenceSession(new_onx.SerializeToString(), providers=["CPUExecutionProvider"])
print("output names:", [o.name for o in sess.get_outputs()])
res = sess.run(None, {"X": X_test[:2]})
print("outputs")
@@ -73,7 +73,9 @@
simple_onx = select_model_inputs_outputs(new_onx, ["probabilities"])
-sess = InferenceSession(simple_onx.SerializeToString())
+sess = InferenceSession(
+ simple_onx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
print("output names:", [o.name for o in sess.get_outputs()])
res = sess.run(None, {"X": X_test[:2]})
print("outputs")
@@ -97,9 +99,9 @@
# ++++++++++++
-model = load("simplified_model.onnx", "wb")
+model = load("simplified_model.onnx")
-sess = InferenceSession(model.SerializeToString())
+sess = InferenceSession(model.SerializeToString(), providers=["CPUExecutionProvider"])
print("output names:", [o.name for o in sess.get_outputs()])
res = sess.run(None, {"X": X_test[:2]})
print("outputs")
diff --git a/docs/tutorial/plot_gbegin_dataframe.py b/docs/tutorial/plot_gbegin_dataframe.py
index d30dab6e7..39a199660 100644
--- a/docs/tutorial/plot_gbegin_dataframe.py
+++ b/docs/tutorial/plot_gbegin_dataframe.py
@@ -78,7 +78,7 @@
# *onnxruntime* does not support dataframes.
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
try:
sess.run(None, train_data)
except Exception as e:
@@ -161,7 +161,7 @@ def guess_schema_from_data(X):
#############################
# Inference.
-sess2 = InferenceSession(onx2.SerializeToString())
+sess2 = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"])
got2 = sess2.run(None, inputs)
diff --git a/docs/tutorial/plot_gconverting.py b/docs/tutorial/plot_gconverting.py
index 3d2f41ab1..bf0cc91f6 100644
--- a/docs/tutorial/plot_gconverting.py
+++ b/docs/tutorial/plot_gconverting.py
@@ -30,7 +30,7 @@
onx = to_onnx(clr, X, options={"zipmap": False}, target_opset=15)
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
input_names = [i.name for i in sess.get_inputs()]
output_names = [o.name for o in sess.get_outputs()]
print("inputs=%r, outputs=%r" % (input_names, output_names))
@@ -53,7 +53,7 @@
target_opset=15,
)
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
input_names = [i.name for i in sess.get_inputs()]
output_names = [o.name for o in sess.get_outputs()]
print("inputs=%r, outputs=%r" % (input_names, output_names))
@@ -75,7 +75,7 @@
target_opset=15,
)
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
input_names = [i.name for i in sess.get_inputs()]
output_names = [o.name for o in sess.get_outputs()]
print("inputs=%r, outputs=%r" % (input_names, output_names))
@@ -100,7 +100,7 @@ def rename_results(proposed_name, existing_names):
onx = to_onnx(clr, X, options={"zipmap": False}, naming=rename_results, target_opset=15)
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
input_names = [i.name for i in sess.get_inputs()]
output_names = [o.name for o in sess.get_outputs()]
print("inputs=%r, outputs=%r" % (input_names, output_names))
diff --git a/docs/tutorial/plot_gexternal_catboost.py b/docs/tutorial/plot_gexternal_catboost.py
index d4ee99478..a9c6bd48f 100644
--- a/docs/tutorial/plot_gexternal_catboost.py
+++ b/docs/tutorial/plot_gexternal_catboost.py
@@ -159,7 +159,7 @@ def skl2onnx_convert_catboost(scope, operator, container):
##########################
# Predictions with onnxruntime.
-sess = rt.InferenceSession("pipeline_catboost.onnx")
+sess = rt.InferenceSession("pipeline_catboost.onnx", providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)})
print("predict", pred_onx[0])
diff --git a/docs/tutorial/plot_gexternal_lightgbm.py b/docs/tutorial/plot_gexternal_lightgbm.py
index 1149e93d5..30a3db914 100644
--- a/docs/tutorial/plot_gexternal_lightgbm.py
+++ b/docs/tutorial/plot_gexternal_lightgbm.py
@@ -96,7 +96,7 @@
##########################
# Predictions with onnxruntime.
-sess = rt.InferenceSession("pipeline_lightgbm.onnx")
+sess = rt.InferenceSession("pipeline_lightgbm.onnx", providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)})
print("predict", pred_onx[0])
diff --git a/docs/tutorial/plot_gexternal_lightgbm_reg.py b/docs/tutorial/plot_gexternal_lightgbm_reg.py
index 2f243d002..19f4d4e03 100644
--- a/docs/tutorial/plot_gexternal_lightgbm_reg.py
+++ b/docs/tutorial/plot_gexternal_lightgbm_reg.py
@@ -121,8 +121,12 @@ def skl2onnx_convert_lightgbm(scope, operator, container):
# Discrepancies
# +++++++++++++
-sess = InferenceSession(model_onnx.SerializeToString())
-sess_split = InferenceSession(model_onnx_split.SerializeToString())
+sess = InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+)
+sess_split = InferenceSession(
+ model_onnx_split.SerializeToString(), providers=["CPUExecutionProvider"]
+)
X32 = X.astype(numpy.float32)
expected = reg.predict(X32)
@@ -175,7 +179,9 @@ def skl2onnx_convert_lightgbm(scope, operator, container):
target_opset={"": 14, "ai.onnx.ml": 2},
options={"split": i},
)
- sess_split = InferenceSession(model_onnx_split.SerializeToString())
+ sess_split = InferenceSession(
+ model_onnx_split.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
got_split = sess_split.run(None, {"X": X32})[0].ravel()
disc_split = numpy.abs(got_split - expected).max()
res.append(dict(split=i, disc=disc_split))
diff --git a/docs/tutorial/plot_gexternal_xgboost.py b/docs/tutorial/plot_gexternal_xgboost.py
index 15ed13263..b689c780b 100644
--- a/docs/tutorial/plot_gexternal_xgboost.py
+++ b/docs/tutorial/plot_gexternal_xgboost.py
@@ -117,7 +117,7 @@
##########################
# Predictions with onnxruntime.
-sess = rt.InferenceSession("pipeline_xgboost.onnx")
+sess = rt.InferenceSession("pipeline_xgboost.onnx", providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, {"input": X[:5].astype(numpy.float32)})
print("predict", pred_onx[0])
print("predict_proba", pred_onx[1][:1])
@@ -151,7 +151,7 @@
pipe, X_train.astype(numpy.float32), target_opset={"": 12, "ai.onnx.ml": 2}
)
-sess = rt.InferenceSession(onx.SerializeToString())
+sess = rt.InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
pred_onx = sess.run(None, {"X": X_test[:5].astype(numpy.float32)})
print("predict", pred_onx[0].ravel())
@@ -187,7 +187,9 @@
cont = False
if cont:
- sess = rt.InferenceSession(onx.SerializeToString())
+ sess = rt.InferenceSession(
+ onx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run([label_name], {input_name: X_test.astype(numpy.float32)})[0]
diff --git a/docs/tutorial/plot_jcustom_syntax.py b/docs/tutorial/plot_jcustom_syntax.py
index acdd6795e..e56fc8912 100644
--- a/docs/tutorial/plot_jcustom_syntax.py
+++ b/docs/tutorial/plot_jcustom_syntax.py
@@ -153,7 +153,7 @@ def decorrelate_transformer_converter(scope, operator, container):
onx = to_onnx(dec, X.astype(numpy.float32))
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float32))
got = sess.run(None, {"X": X.astype(numpy.float32)})[0]
@@ -173,7 +173,7 @@ def diff(p1, p2):
onx = to_onnx(dec, X.astype(numpy.float64))
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float64))
got = sess.run(None, {"X": X.astype(numpy.float64)})[0]
diff --git a/docs/tutorial/plot_kcustom_converter_wrapper.py b/docs/tutorial/plot_kcustom_converter_wrapper.py
index dc9388bed..f93c35ab9 100644
--- a/docs/tutorial/plot_kcustom_converter_wrapper.py
+++ b/docs/tutorial/plot_kcustom_converter_wrapper.py
@@ -160,7 +160,7 @@ def decorrelate_transformer_converter(scope, operator, container):
onx = to_onnx(dec, X.astype(numpy.float32))
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float32))
got = sess.run(None, {"X": X.astype(numpy.float32)})[0]
@@ -180,7 +180,7 @@ def diff(p1, p2):
onx = to_onnx(dec, X.astype(numpy.float64))
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float64))
got = sess.run(None, {"X": X.astype(numpy.float64)})[0]
diff --git a/docs/tutorial/plot_lcustom_options.py b/docs/tutorial/plot_lcustom_options.py
index 637f19793..805b26139 100644
--- a/docs/tutorial/plot_lcustom_options.py
+++ b/docs/tutorial/plot_lcustom_options.py
@@ -143,7 +143,7 @@ def decorrelate_transformer_converter(scope, operator, container):
onx = to_onnx(dec, X.astype(numpy.float32))
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float32))
got = sess.run(None, {"X": X.astype(numpy.float32)})[0]
@@ -163,7 +163,7 @@ def diff(p1, p2):
onx2 = to_onnx(dec, X.astype(numpy.float32), options={"use_gemm": True})
-sess2 = InferenceSession(onx2.SerializeToString())
+sess2 = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float32))
got2 = sess2.run(None, {"X": X.astype(numpy.float32)})[0]
diff --git a/docs/tutorial/plot_mcustom_parser.py b/docs/tutorial/plot_mcustom_parser.py
index 84dcd29f2..4ef837a75 100644
--- a/docs/tutorial/plot_mcustom_parser.py
+++ b/docs/tutorial/plot_mcustom_parser.py
@@ -161,7 +161,7 @@ def decorrelate_transformer_parser(scope, model, inputs, custom_parsers=None):
onx = to_onnx(dec, X.astype(numpy.float32), target_opset=14)
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
exp = dec.transform(X.astype(numpy.float32))
results = sess.run(None, {"X": X.astype(numpy.float32)})
diff --git a/docs/tutorial/plot_ngrams.py b/docs/tutorial/plot_ngrams.py
index 65d8540c9..e6ecb79bc 100644
--- a/docs/tutorial/plot_ngrams.py
+++ b/docs/tutorial/plot_ngrams.py
@@ -81,7 +81,7 @@
# declared.
onx = to_onnx(mod2, corpus)
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
got = sess.run(None, {"X": corpus})
###################################
diff --git a/docs/tutorial/plot_transformer_discrepancy.py b/docs/tutorial/plot_transformer_discrepancy.py
index f6c093957..ea9d7ae56 100644
--- a/docs/tutorial/plot_transformer_discrepancy.py
+++ b/docs/tutorial/plot_transformer_discrepancy.py
@@ -105,7 +105,7 @@ def diff(a, b):
# Execution with ONNX
# +++++++++++++++++++
-sess = InferenceSession(onx.SerializeToString())
+sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
got = sess.run(None, {"X": strings})[0]
print(f"differences={diff(tr, got):g}")
print(print_sparse_matrix(got))
diff --git a/docs/tutorial/plot_usparse_xgboost.py b/docs/tutorial/plot_usparse_xgboost.py
index b8ae97aa7..8885b43b0 100644
--- a/docs/tutorial/plot_usparse_xgboost.py
+++ b/docs/tutorial/plot_usparse_xgboost.py
@@ -204,7 +204,9 @@ def make_pipelines(
with open("model.onnx", "wb") as f:
f.write(model_onnx.SerializeToString())
- sess = rt.InferenceSession(model_onnx.SerializeToString())
+ sess = rt.InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
inputs = {
"input": df[["c0", "c1"]].values.astype(numpy.float32),
"text": df[["text"]].values,
diff --git a/docs/tutorial/plot_woe_transformer.py b/docs/tutorial/plot_woe_transformer.py
index ba45ef414..ce93ee2ee 100644
--- a/docs/tutorial/plot_woe_transformer.py
+++ b/docs/tutorial/plot_woe_transformer.py
@@ -80,14 +80,14 @@
#
# onehot=False
onx1 = to_onnx(woe1, X)
-sess = InferenceSession(onx1.SerializeToString())
+sess = InferenceSession(onx1.SerializeToString(), providers=["CPUExecutionProvider"])
print(sess.run(None, {"X": X})[0])
##################################
# onehot=True
onx2 = to_onnx(woe2, X)
-sess = InferenceSession(onx2.SerializeToString())
+sess = InferenceSession(onx2.SerializeToString(), providers=["CPUExecutionProvider"])
print(sess.run(None, {"X": X})[0])
################################################
@@ -153,5 +153,5 @@
# And the conversion to ONNX using the same instruction.
onxinf = to_onnx(woe1, X)
-sess = InferenceSession(onxinf.SerializeToString())
+sess = InferenceSession(onxinf.SerializeToString(), providers=["CPUExecutionProvider"])
print(sess.run(None, {"X": X})[0])
diff --git a/skl2onnx/__init__.py b/skl2onnx/__init__.py
index 8bb46e679..73210f047 100644
--- a/skl2onnx/__init__.py
+++ b/skl2onnx/__init__.py
@@ -3,7 +3,7 @@
"""
Main entry point to the converter from the *scikit-learn* to *onnx*.
"""
-__version__ = "1.15.0"
+__version__ = "1.16.0"
__author__ = "Microsoft"
__producer__ = "skl2onnx"
__producer_version__ = __version__
diff --git a/skl2onnx/common/_topology.py b/skl2onnx/common/_topology.py
index a5f5139a9..0a20c916b 100644
--- a/skl2onnx/common/_topology.py
+++ b/skl2onnx/common/_topology.py
@@ -976,9 +976,9 @@ def __init__(
array types from CoreML. It's usually
1 or None.
:param initial_types: A list providing some types for some
- root variables.
- Each element is a tuple of a variable name and a type defined
- in *data_types.py*.
+ root variables.
+ Each element is a tuple of a variable name and a type defined
+ in *data_types.py*.
:param custom_conversion_functions: a dictionary for specifying
the user customized conversion function
:param custom_shape_calculators: a dictionary for specifying the
@@ -1001,12 +1001,12 @@ def __init__(
for k in self.custom_conversion_functions:
if not callable(k):
raise TypeError(
- "Keys in custom_conversion_functions must be " "types not strings."
+ "Keys in custom_conversion_functions must be types not strings."
)
for k in self.custom_shape_calculators:
if not callable(k):
raise TypeError(
- "Keys in custom_shape_calculators must be " "types not strings."
+ "Keys in custom_shape_calculators must be types not strings."
)
# A map of local overwritten model aliases.
@@ -1472,7 +1472,6 @@ def convert_topology(
model_name,
doc_string,
target_opset,
- channel_first_inputs=None,
options=None,
remove_identity=True,
verbose=0,
diff --git a/skl2onnx/operator_converters/text_vectoriser.py b/skl2onnx/operator_converters/text_vectoriser.py
index 5d5ba7eba..b0244ba65 100644
--- a/skl2onnx/operator_converters/text_vectoriser.py
+++ b/skl2onnx/operator_converters/text_vectoriser.py
@@ -148,6 +148,10 @@ def convert_sklearn_text_vectorizer(
These separators are used to split a string into words.
Options *separators* is ignore if options *tokenexp* is not None.
Default value: ``[' ', '[.]', '\\\\?', ',', ';', ':', '\\\\!']``.
+ locale:
+ The locale is not mentioned in scikit-object. This option can be
+ used to change the value for parameter `locale` of ONNX operator
+ `StringNormalizer`.
Example (from :ref:`l-example-tfidfvectorizer`):
@@ -219,9 +223,15 @@ def convert_sklearn_text_vectorizer(
options = container.get_options(
op,
- dict(separators="DEFAULT", tokenexp=None, nan=False, keep_empty_string=False),
+ dict(
+ separators="DEFAULT",
+ tokenexp=None,
+ nan=False,
+ keep_empty_string=False,
+ locale=None,
+ ),
)
- if set(options) != {"separators", "tokenexp", "nan", "keep_empty_string"}:
+ if set(options) != {"separators", "tokenexp", "nan", "keep_empty_string", "locale"}:
raise RuntimeError(
"Unknown option {} for {}".format(set(options) - {"separators"}, type(op))
)
@@ -309,6 +319,8 @@ def convert_sklearn_text_vectorizer(
)
op_version = 9
domain = "com.microsoft"
+ if options["locale"] is not None:
+ attrs["locale"] = options["locale"]
opvs = 1 if domain == "com.microsoft" else op_version
if stop_words:
attrs["stopwords"] = list(sorted(stop_words))
@@ -535,5 +547,6 @@ def convert_sklearn_text_vectorizer(
"separators": None,
"nan": [True, False],
"keep_empty_string": [True, False],
+ "locale": None,
},
)
diff --git a/skl2onnx/operator_converters/tfidf_vectoriser.py b/skl2onnx/operator_converters/tfidf_vectoriser.py
index a624b90b9..a3d2e9b44 100644
--- a/skl2onnx/operator_converters/tfidf_vectoriser.py
+++ b/skl2onnx/operator_converters/tfidf_vectoriser.py
@@ -56,5 +56,6 @@ def convert_sklearn_tfidf_vectoriser(
"separators": None,
"nan": [True, False],
"keep_empty_string": [True, False],
+ "locale": None,
},
)
diff --git a/skl2onnx/sklapi/sklearn_text_onnx.py b/skl2onnx/sklapi/sklearn_text_onnx.py
index e3ae5e32b..def8ae544 100644
--- a/skl2onnx/sklapi/sklearn_text_onnx.py
+++ b/skl2onnx/sklapi/sklearn_text_onnx.py
@@ -22,6 +22,7 @@ def register():
"separators": None,
"nan": [True, False],
"keep_empty_string": [True, False],
+ "locale": None,
},
)
@@ -35,5 +36,6 @@ def register():
"separators": None,
"nan": [True, False],
"keep_empty_string": [True, False],
+ "locale": None,
},
)
diff --git a/tests/test_onnxruntime.py b/tests/test_onnxruntime.py
index 28ec705ae..c40b915f4 100644
--- a/tests/test_onnxruntime.py
+++ b/tests/test_onnxruntime.py
@@ -2,6 +2,7 @@
import os
import unittest
+import packaging.version as pv
import numpy as np
from numpy.testing import assert_allclose
@@ -10,7 +11,7 @@
from test_utils import ReferenceEvaluatorEx
except ImportError:
ReferenceEvaluatorEx = None
-from onnxruntime import InferenceSession
+from onnxruntime import InferenceSession, __version__ as ort_version
class TestOnnxruntime(unittest.TestCase):
@@ -87,6 +88,10 @@ class TestOnnxruntime(unittest.TestCase):
)
@unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.12.0"),
+ reason="ai.opset.ml==3 not implemented",
+ )
def test_tree_ensemble_classifier(self):
"""
The onnx graph was produced by the following code.
@@ -127,6 +132,10 @@ def test_tree_ensemble_classifier(self):
assert_allclose(labelo, label)
@unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.12.0"),
+ reason="ai.opset.ml==3 not implemented",
+ )
def test_tree_ensemble_classifier_2(self):
X = self.X3_15
name = os.path.join(os.path.dirname(__file__), "datasets", "treecl2.onnx")
@@ -138,6 +147,10 @@ def test_tree_ensemble_classifier_2(self):
assert_allclose(labelo, label)
@unittest.skipIf(ReferenceEvaluatorEx is None, "onnx too old")
+ @unittest.skipIf(
+ pv.Version(ort_version) < pv.Version("1.12.0"),
+ reason="ai.opset.ml==3 not implemented",
+ )
def test_tree_ensemble_classifier_3(self):
X = self.X3_15[:, :10]
name = os.path.join(os.path.dirname(__file__), "datasets", "treecl3.onnx")
diff --git a/tests/test_sklearn_count_vectorizer_converter.py b/tests/test_sklearn_count_vectorizer_converter.py
index eb7f1cc66..cf21218a4 100644
--- a/tests/test_sklearn_count_vectorizer_converter.py
+++ b/tests/test_sklearn_count_vectorizer_converter.py
@@ -4,6 +4,7 @@
Tests scikit-learn's CountVectorizer converter.
"""
import unittest
+import sys
import numpy
from sklearn.feature_extraction.text import CountVectorizer
from skl2onnx import convert_sklearn
@@ -130,7 +131,39 @@ def test_model_count_vectorizer_binary(self):
basename="SklearnCountVectorizerBinary-OneOff-SklCol",
)
+ @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
+ def test_model_count_vectorizer11_locale(self):
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
+ vect = CountVectorizer(ngram_range=(1, 1))
+ vect.fit(corpus.ravel())
+ locale = "en_US"
+ options = {CountVectorizer: {"locale": locale}}
+ model_onnx = convert_sklearn(
+ vect,
+ "CountVectorizer",
+ [("input", StringTensorType([1]))],
+ target_opset=TARGET_OPSET,
+ options=options,
+ )
+ self.assertIn('name: "locale"', str(model_onnx))
+ self.assertIn(f's: "{locale}"', str(model_onnx))
+ self.assertTrue(model_onnx is not None)
+ if sys.platform == "win32":
+ # Linux fails due to misconfiguration with langage-pack-en.
+ dump_data_and_model(
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnCountVectorizer11Locale-OneOff-SklCol",
+ )
+
if __name__ == "__main__":
- TestSklearnCountVectorizer().test_model_count_vectorizer12()
unittest.main(verbosity=2)
diff --git a/tests/test_sklearn_glm_classifier_converter.py b/tests/test_sklearn_glm_classifier_converter.py
index c34ffd3c6..e47396f3a 100644
--- a/tests/test_sklearn_glm_classifier_converter.py
+++ b/tests/test_sklearn_glm_classifier_converter.py
@@ -5,7 +5,7 @@
import numpy as np
from numpy.testing import assert_almost_equal
import sklearn
-from sklearn import linear_model
+from sklearn import linear_model, __version__ as sklearn_version
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.exceptions import ConvergenceWarning
@@ -33,6 +33,7 @@
ort_version = ort_version.split("+")[0]
+skl_version = ".".join(sklearn_version.split(".")[:2])
def _sklearn_version():
@@ -509,6 +510,14 @@ def test_model_linear_svc_bool(self):
self.assertIsNotNone(model_onnx)
dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVCBool")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_binary(self):
model, X = fit_classification_model(linear_model.RidgeClassifier(), 2)
@@ -521,6 +530,14 @@ def test_model_ridge_classifier_binary(self):
self.assertIsNotNone(model_onnx)
dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierBin")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_binary_nozipmap(self):
model, X = fit_classification_model(
@@ -560,6 +577,14 @@ def test_model_ridge_classifier_binary_nozipmap(self):
X, model, model_onnx, basename="SklearnRidgeClassifierNZMBin"
)
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_binary_mispelled_zipmap(self):
model, X = fit_classification_model(
@@ -579,6 +604,14 @@ def test_model_ridge_classifier_binary_mispelled_zipmap(self):
except NameError as e:
assert "Option 'zipmap ' not in" in str(e)
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_binary_mispelled_zipmap_wrong_value(self):
model, X = fit_classification_model(
@@ -598,6 +631,14 @@ def test_model_ridge_classifier_binary_mispelled_zipmap_wrong_value(self):
except ValueError as e:
assert "Unexpected value ['True'] for option 'zipmap'" in str(e)
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_multi_class(self):
model, X = fit_classification_model(linear_model.RidgeClassifier(), 5)
@@ -612,6 +653,14 @@ def test_model_ridge_classifier_multi_class(self):
X, model, model_onnx, basename="SklearnRidgeClassifierMulti"
)
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_int(self):
model, X = fit_classification_model(
@@ -626,6 +675,14 @@ def test_model_ridge_classifier_int(self):
self.assertIsNotNone(model_onnx)
dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeClassifierInt")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
@ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_classifier_bool(self):
model, X = fit_classification_model(
diff --git a/tests/test_sklearn_glm_regressor_converter.py b/tests/test_sklearn_glm_regressor_converter.py
index c23ffaca5..063fa43fd 100644
--- a/tests/test_sklearn_glm_regressor_converter.py
+++ b/tests/test_sklearn_glm_regressor_converter.py
@@ -14,7 +14,7 @@
# scikit-learn < 0.22
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
-from sklearn import linear_model
+from sklearn import linear_model, __version__ as sklearn_version
from sklearn.datasets import make_regression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
@@ -52,6 +52,7 @@
ort_version = ort_version.split("+")[0]
+skl_version = ".".join(sklearn_version.split(".")[:2])
class TestGLMRegressorConverter(unittest.TestCase):
@@ -225,7 +226,15 @@ def test_model_linear_svr_bool(self):
self.assertIsNotNone(model_onnx)
dump_data_and_model(X, model, model_onnx, basename="SklearnLinearSVRBool")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning))
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
+ @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge(self):
model, X = fit_regression_model(linear_model.Ridge())
model_onnx = convert_sklearn(
@@ -237,7 +246,15 @@ def test_model_ridge(self):
self.assertIsNotNone(model_onnx)
dump_data_and_model(X, model, model_onnx, basename="SklearnRidge-Dec4")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning))
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
+ @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_int(self):
model, X = fit_regression_model(linear_model.Ridge(), is_int=True)
model_onnx = convert_sklearn(
@@ -249,7 +266,15 @@ def test_model_ridge_int(self):
self.assertIsNotNone(model_onnx)
dump_data_and_model(X, model, model_onnx, basename="SklearnRidgeInt-Dec4")
- @ignore_warnings(category=(FutureWarning, ConvergenceWarning))
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
+ @ignore_warnings(category=(DeprecationWarning, ConvergenceWarning))
def test_model_ridge_bool(self):
model, X = fit_regression_model(linear_model.Ridge(), is_bool=True)
model_onnx = convert_sklearn(
diff --git a/tests/test_sklearn_imputer_converter.py b/tests/test_sklearn_imputer_converter.py
index c514651c9..e1841b945 100644
--- a/tests/test_sklearn_imputer_converter.py
+++ b/tests/test_sklearn_imputer_converter.py
@@ -21,6 +21,8 @@
# changed in 0.20
SimpleImputer = None
+from onnxruntime import __version__ as ort_version
+
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import (
FloatTensorType,
@@ -35,6 +37,7 @@
skl_ver = ".".join(sklearn.__version__.split(".")[:2])
+ort_version = ort_version.split("+")[0]
class TestSklearnImputerConverter(unittest.TestCase):
@@ -113,6 +116,14 @@ def test_simple_imputer_float_inputs(self):
)
@unittest.skipIf(SimpleImputer is None, reason="SimpleImputer changed in 0.20")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_ver) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
def test_simple_imputer_float_inputs_int_mostf(self):
model = SimpleImputer(strategy="most_frequent", fill_value="nan")
data = [[1, 2], [np.nan, 3], [7, 6], [8, np.nan]]
diff --git a/tests/test_sklearn_multi_output.py b/tests/test_sklearn_multi_output.py
index 8c65c1220..a26cb58f2 100644
--- a/tests/test_sklearn_multi_output.py
+++ b/tests/test_sklearn_multi_output.py
@@ -15,11 +15,13 @@
except ImportError:
from sklearn.utils.testing import ignore_warnings
from sklearn import __version__ as skl_ver
+from onnxruntime import __version__ as ort_version
from skl2onnx import to_onnx
from test_utils import dump_data_and_model, TARGET_OPSET
skl_ver = ".".join(skl_ver.split(".")[:2])
+ort_version = ort_version.split("+")[0]
class TestMultiOutputConverter(unittest.TestCase):
@@ -31,6 +33,14 @@ def setUp(self):
# logging.basicConfig(level=logging.DEBUG)
pass
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_ver) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
def test_multi_output_regressor(self):
X, y = load_linnerud(return_X_y=True)
clf = MultiOutputRegressor(Ridge(random_state=123)).fit(X, y)
diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py
index d924e5ba5..76d6b04d7 100644
--- a/tests/test_sklearn_nearest_neighbour_converter.py
+++ b/tests/test_sklearn_nearest_neighbour_converter.py
@@ -18,7 +18,7 @@
except ImportError:
# older versions of scikit-learn
from sklearn.utils.testing import ignore_warnings
-from sklearn import datasets
+from sklearn import datasets, __version__ as sklearn_version
from sklearn.model_selection import train_test_split
from sklearn.neighbors import (
KNeighborsRegressor,
@@ -67,6 +67,7 @@ def dont_test_radius():
ort_version = ".".join(ort_version.split(".")[:2])
+skl_version = ".".join(sklearn_version.split(".")[:2])
class TestNearestNeighbourConverter(unittest.TestCase):
@@ -795,6 +796,14 @@ def test_onnx_test_knn_transform(self):
assert_almost_equal(ind, y[0])
@unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("1.11.0"),
+ reason="onnxruntime not recent enough",
+ )
+ @unittest.skipIf(
+ pv.Version(skl_version) <= pv.Version("1.1.0"),
+ reason="sklearn fails on windows",
+ )
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_nca_default(self):
model, X_test = fit_classification_model(
@@ -810,6 +819,9 @@ def test_sklearn_nca_default(self):
dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADefault")
@unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22")
+ @unittest.skipIf(
+ pv.Version(sklearn_version) < pv.Version("1.1.0"), reason="n-d not supported"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_nca_identity(self):
model, X_test = fit_classification_model(
@@ -828,6 +840,9 @@ def test_sklearn_nca_identity(self):
dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCAIdentity")
@unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22")
+ @unittest.skipIf(
+ pv.Version(sklearn_version) < pv.Version("1.1.0"), reason="n-d not supported"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_nca_double(self):
model, X_test = fit_classification_model(
@@ -845,6 +860,9 @@ def test_sklearn_nca_double(self):
dump_data_and_model(X_test, model, model_onnx, basename="SklearnNCADouble")
@unittest.skipIf(NeighborhoodComponentsAnalysis is None, reason="new in 0.22")
+ @unittest.skipIf(
+ pv.Version(sklearn_version) < pv.Version("1.1.0"), reason="n-d not supported"
+ )
@ignore_warnings(category=DeprecationWarning)
def test_sklearn_nca_int(self):
model, X_test = fit_classification_model(
diff --git a/tests/test_sklearn_power_transformer.py b/tests/test_sklearn_power_transformer.py
index 2f9d3141e..c8177b22b 100644
--- a/tests/test_sklearn_power_transformer.py
+++ b/tests/test_sklearn_power_transformer.py
@@ -4,6 +4,7 @@
Test scikit-learn's PowerTransform
"""
import unittest
+import packaging.version as pv
import numpy as np
@@ -157,6 +158,9 @@ def test_powertransformer_box_cox_with_scaler(self):
dump_data_and_model(data, model, model_onnx, basename="PowerTransformer")
@unittest.skipIf(PowerTransformer is None, "Problems with import occurred")
+ @unittest.skipIf(
+ pv.Version(sklearn.__version__) < pv.Version("1.3.0"), reason="PR #26566"
+ )
def test_powertransformer_zeros(self):
pt = PowerTransformer()
data = np.array([[0, 0], [0, 0]], dtype=np.float32)
diff --git a/tests/test_sklearn_random_forest_converters.py b/tests/test_sklearn_random_forest_converters.py
index d0ea0a6f4..c4b2f9db2 100644
--- a/tests/test_sklearn_random_forest_converters.py
+++ b/tests/test_sklearn_random_forest_converters.py
@@ -447,7 +447,7 @@ def common_test_model_hgb_classifier(self, add_nan=False, n_classes=2):
self.assertIsNotNone(model_onnx)
X_test = X_test.astype(numpy.float32)[:5]
- # There is a bug in onnxruntime <= 1.1.0.
+ # There is a bug in onnxruntime <= 1.11.0.
# Raw scores are always positive.
dump_data_and_model(
X_test,
diff --git a/tests/test_sklearn_tfidf_vectorizer_converter.py b/tests/test_sklearn_tfidf_vectorizer_converter.py
index 69c2eb445..f72f9080f 100644
--- a/tests/test_sklearn_tfidf_vectorizer_converter.py
+++ b/tests/test_sklearn_tfidf_vectorizer_converter.py
@@ -4,8 +4,9 @@
Tests scikit-learn's tfidf converter.
"""
import unittest
-import packaging.version as pv
import copy
+import sys
+import packaging.version as pv
import numpy
from numpy.testing import assert_almost_equal
from sklearn.feature_extraction.text import TfidfVectorizer
@@ -615,6 +616,48 @@ def test_model_tfidf_vectorizer11_custom_vocabulary(self):
basename="SklearnTfidfVectorizer11CustomVocab-OneOff-SklCol",
)
+ @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
+ @unittest.skipIf(
+ pv.Version(ort_version) <= pv.Version("0.3.0"), reason="Requires opset 9."
+ )
+ def test_model_tfidf_vectorizer_locale(self):
+ corpus = numpy.array(
+ [
+ "This is the first document.",
+ "This document is the second document.",
+ "And this is the third one.",
+ "Is this the first document?",
+ ]
+ ).reshape((4, 1))
+ vect = TfidfVectorizer(ngram_range=(1, 1), norm=None)
+ vect.fit(corpus.ravel())
+ locale = "en_US"
+ options = self.get_options()
+ options[TfidfVectorizer].update({"locale": locale})
+ model_onnx = convert_sklearn(
+ vect,
+ "TfidfVectorizer",
+ [("input", StringTensorType())],
+ options=options,
+ target_opset=TARGET_OPSET,
+ )
+ self.assertIn('name: "locale"', str(model_onnx))
+ self.assertIn(f's: "{locale}"', str(model_onnx))
+ if sys.platform == "win32":
+ # Linux fails due to misconfiguration with langage-pack-en.
+ dump_data_and_model(
+ corpus,
+ vect,
+ model_onnx,
+ basename="SklearnTfidfVectorizer11Locale-OneOff-SklCol",
+ )
+
+ sess = InferenceSession(
+ model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+ )
+ res = sess.run(None, {"input": corpus.ravel()})[0]
+ assert res.shape == (4, 9)
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/test_utils/__init__.py b/tests/test_utils/__init__.py
index a336ebf56..51665b005 100644
--- a/tests/test_utils/__init__.py
+++ b/tests/test_utils/__init__.py
@@ -81,8 +81,7 @@ def _get_ir_version(opv):
def max_onnxruntime_opset():
"""
See `Versioning.md
- `_.
+ `_.
"""
vi = pv.Version(ort_version.split("+")[0])
if vi >= pv.Version("1.14.0"):