diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 5a96c230e..9c626c7fa 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -1200,9 +1200,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.16.2" +version = "0.16.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a378727d5fdcaafd15b5afe9842cff1c25fdc43f62a162ffda2263c57ad98703" +checksum = "0220c44442c9b239dd4357aa856ac468a4f5e1f0df19ddb89b2522952eb4c6ca" dependencies = [ "cfg-if", "indoc", @@ -1216,18 +1216,19 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.16.2" +version = "0.16.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fbb27a3e96edd34c13d97d0feefccc90a79270c577c66e19d95af8323823dfc" +checksum = "9c819d397859445928609d0ec5afc2da5204e0d0f73d6bf9e153b04e83c9cdc2" dependencies = [ "once_cell", + "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.16.2" +version = "0.16.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b719fff844bcf3f911132112ec06527eb195f6a98e0c42cf97e1118929fd4ea" +checksum = "ca882703ab55f54702d7bfe1189b41b0af10272389f04cae38fe4cd56c65f75f" dependencies = [ "libc", "pyo3-build-config", @@ -1235,9 +1236,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.16.2" +version = "0.16.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f795e52d3320abb349ca28b501a7112154a87f353fae1c811deecd58e99cfa9b" +checksum = "568749402955ad7be7bad9a09b8593851cd36e549ac90bfd44079cea500f3f21" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1247,12 +1248,11 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.16.2" +version = "0.16.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e03aa57a3bb7b96982958088df38302a139df4eef54671bc595f26556cb75b" +checksum = "611f64e82d98f447787e82b8e7b0ebc681e1eb78fc1252668b2c605ffb4e1eb8" dependencies = [ "proc-macro2", - "pyo3-build-config", "quote", "syn", ] @@ -1611,6 +1611,12 @@ dependencies = [ "xattr", ] +[[package]] +name = "target-lexicon" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c02424087780c9b71cc96799eaeddff35af2bc513278cda5c99fc1f5d026d3c1" + [[package]] name = "tempfile" version = "3.3.0" @@ -1733,7 +1739,7 @@ dependencies = [ [[package]] name = "tokenizers-python" -version = "0.11.0" +version = "0.13.0" dependencies = [ "env_logger", "itertools 0.9.0", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index cebc70e16..e9d9ef292 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tokenizers-python" -version = "0.11.0" +version = "0.13.0" authors = ["Anthony MOI "] edition = "2018" @@ -14,7 +14,7 @@ serde = { version = "1.0", features = [ "rc", "derive" ]} serde_json = "1.0" libc = "0.2" env_logger = "0.7.1" -pyo3 = "0.16.2" +pyo3 = { version = "0.16.2", features = ["extension-module"] } numpy = "0.16.2" ndarray = "0.13" onig = { version = "6.0", default-features = false } @@ -26,7 +26,7 @@ path = "../../tokenizers" [dev-dependencies] tempfile = "3.1" -pyo3 = { version = "0.16.2", features = ["auto-initialize"] } [features] -default = ["pyo3/extension-module"] +test = ["pyo3/auto-initialize"] + diff --git a/bindings/python/Makefile b/bindings/python/Makefile index 8fc5fafdf..ae588ed49 100644 --- a/bindings/python/Makefile +++ b/bindings/python/Makefile @@ -20,7 +20,7 @@ TESTS_RESOURCES = $(DATA_DIR)/small.txt $(DATA_DIR)/roberta.json test: $(TESTS_RESOURCES) pip install pytest requests setuptools_rust numpy pyarrow datasets python -m pytest -s -v tests - cargo test --no-default-features + cargo test --features test $(DATA_DIR)/big.txt : $(dir_guard) diff --git a/bindings/python/build-wheels.sh b/bindings/python/build-wheels.sh index 278992556..3efe91073 100755 --- a/bindings/python/build-wheels.sh +++ b/bindings/python/build-wheels.sh @@ -1,23 +1,28 @@ #!/bin/bash set -ex -curl https://sh.rustup.rs -sSf | sh -s -- -y +if ! command -v cargo &> /dev/null +then + curl https://sh.rustup.rs -sSf | sh -s -- -y +fi + export PATH="$HOME/.cargo/bin:$PATH" -for PYBIN in /opt/python/{cp37-cp37m,cp38-cp38,cp39-cp39,cp310-cp310}/bin; do +for PYBIN in /opt/python/cp{37,38,39,310}*/bin; do export PYTHON_SYS_EXECUTABLE="$PYBIN/python" - "${PYBIN}/pip" install -U setuptools-rust==0.11.3 + "${PYBIN}/pip" install -U setuptools-rust setuptools wheel "${PYBIN}/python" setup.py bdist_wheel rm -rf build/* done -for whl in dist/*.whl; do +for whl in ./dist/*.whl; do auditwheel repair "$whl" -w dist/ done # Keep only manylinux wheels -rm dist/*-linux_* +rm ./dist/*-linux_* + # Upload wheels /opt/python/cp37-cp37m/bin/pip install -U awscli diff --git a/bindings/python/py_src/tokenizers/__init__.py b/bindings/python/py_src/tokenizers/__init__.py index 3ae412e61..b78f02367 100644 --- a/bindings/python/py_src/tokenizers/__init__.py +++ b/bindings/python/py_src/tokenizers/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.13.0.dev0" +__version__ = "0.13.0" from typing import Tuple, Union, Tuple, List from enum import Enum diff --git a/bindings/python/setup.py b/bindings/python/setup.py index 649653c6b..7d523d23f 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -8,7 +8,7 @@ setup( name="tokenizers", - version="0.13.0.dev0", + version="0.13.0", description="Fast and Customizable Tokenizers", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown", diff --git a/bindings/python/test2.py b/bindings/python/test2.py deleted file mode 100644 index 69ce9f6ee..000000000 --- a/bindings/python/test2.py +++ /dev/null @@ -1,4 +0,0 @@ -from tokenizers import Tokenizer - - -tokenizer = Tokenizer.from_file("/home/nicolas/Downloads/tokenizer-wiki.txt")