diff --git a/recipe/patches/0001-Upgrade-pyo3-to-0.15.patch b/recipe/patches/0001-Upgrade-pyo3-to-0.15.patch
index a631890..ad9ea70 100644
--- a/recipe/patches/0001-Upgrade-pyo3-to-0.15.patch
+++ b/recipe/patches/0001-Upgrade-pyo3-to-0.15.patch
@@ -1,47 +1,2228 @@
-From 00932743fa1f7eb4a9d7392ea86f9a82e3a19e5c Mon Sep 17 00:00:00 2001
+From e8cc2692f1a377621b645908a3e4d7088151d17f Mon Sep 17 00:00:00 2001
 From: messense <messense@icloud.com>
 Date: Wed, 10 Mar 2021 10:45:50 +0800
 Subject: [PATCH] Upgrade pyo3 to 0.15
 
 Rebased-By: H. Vetinari <h.vetinari@gmx.com>
 ---
- bindings/python/.cargo/config.toml           |  11 +
- bindings/python/Cargo.lock                   | 313 +++++++++----------
- bindings/python/Cargo.toml                   |  10 +-
- bindings/python/src/decoders.rs              |  26 +-
- bindings/python/src/encoding.rs              |  24 +-
- bindings/python/src/error.rs                 |   2 +-
- bindings/python/src/models.rs                |  40 +--
- bindings/python/src/normalizers.rs           |  54 ++--
- bindings/python/src/pre_tokenizers.rs        |  52 +--
- bindings/python/src/processors.rs            |  24 +-
- bindings/python/src/token.rs                 |   2 +-
- bindings/python/src/tokenizer.rs             |  57 ++--
- bindings/python/src/trainers.rs              |  16 +-
- bindings/python/src/utils/iterators.rs       |   2 +-
- bindings/python/src/utils/normalization.rs   |  40 +--
- bindings/python/src/utils/pretokenization.rs |  16 +-
- bindings/python/src/utils/regex.rs           |   4 +-
- 17 files changed, 348 insertions(+), 345 deletions(-)
- create mode 100644 bindings/python/.cargo/config.toml
+ 0001-Upgrade-pyo3-to-0.15.patch              | 2192 ++++++++++++++++++
+ bindings/python/Cargo.lock                   |  313 ++-
+ bindings/python/Cargo.toml                   |   10 +-
+ bindings/python/src/decoders.rs              |   26 +-
+ bindings/python/src/encoding.rs              |   24 +-
+ bindings/python/src/error.rs                 |    2 +-
+ bindings/python/src/models.rs                |   40 +-
+ bindings/python/src/normalizers.rs           |   54 +-
+ bindings/python/src/pre_tokenizers.rs        |   52 +-
+ bindings/python/src/processors.rs            |   24 +-
+ bindings/python/src/token.rs                 |    2 +-
+ bindings/python/src/tokenizer.rs             |   57 +-
+ bindings/python/src/trainers.rs              |   16 +-
+ bindings/python/src/utils/iterators.rs       |    2 +-
+ bindings/python/src/utils/normalization.rs   |   40 +-
+ bindings/python/src/utils/pretokenization.rs |   16 +-
+ bindings/python/src/utils/regex.rs           |    4 +-
+ 17 files changed, 2529 insertions(+), 345 deletions(-)
+ create mode 100644 0001-Upgrade-pyo3-to-0.15.patch
 
-diff --git a/bindings/python/.cargo/config.toml b/bindings/python/.cargo/config.toml
+diff --git a/0001-Upgrade-pyo3-to-0.15.patch b/0001-Upgrade-pyo3-to-0.15.patch
 new file mode 100644
-index 0000000..d47f983
+index 0000000..a631890
 --- /dev/null
-+++ b/bindings/python/.cargo/config.toml
-@@ -0,0 +1,11 @@
-+[target.x86_64-apple-darwin]
-+rustflags = [
-+  "-C", "link-arg=-undefined",
-+  "-C", "link-arg=dynamic_lookup",
-+]
++++ b/0001-Upgrade-pyo3-to-0.15.patch
+@@ -0,0 +1,2192 @@
++From 00932743fa1f7eb4a9d7392ea86f9a82e3a19e5c Mon Sep 17 00:00:00 2001
++From: messense <messense@icloud.com>
++Date: Wed, 10 Mar 2021 10:45:50 +0800
++Subject: [PATCH] Upgrade pyo3 to 0.15
++
++Rebased-By: H. Vetinari <h.vetinari@gmx.com>
++---
++ bindings/python/.cargo/config.toml           |  11 +
++ bindings/python/Cargo.lock                   | 313 +++++++++----------
++ bindings/python/Cargo.toml                   |  10 +-
++ bindings/python/src/decoders.rs              |  26 +-
++ bindings/python/src/encoding.rs              |  24 +-
++ bindings/python/src/error.rs                 |   2 +-
++ bindings/python/src/models.rs                |  40 +--
++ bindings/python/src/normalizers.rs           |  54 ++--
++ bindings/python/src/pre_tokenizers.rs        |  52 +--
++ bindings/python/src/processors.rs            |  24 +-
++ bindings/python/src/token.rs                 |   2 +-
++ bindings/python/src/tokenizer.rs             |  57 ++--
++ bindings/python/src/trainers.rs              |  16 +-
++ bindings/python/src/utils/iterators.rs       |   2 +-
++ bindings/python/src/utils/normalization.rs   |  40 +--
++ bindings/python/src/utils/pretokenization.rs |  16 +-
++ bindings/python/src/utils/regex.rs           |   4 +-
++ 17 files changed, 348 insertions(+), 345 deletions(-)
++ create mode 100644 bindings/python/.cargo/config.toml
++
++diff --git a/bindings/python/.cargo/config.toml b/bindings/python/.cargo/config.toml
++new file mode 100644
++index 0000000..d47f983
++--- /dev/null
+++++ b/bindings/python/.cargo/config.toml
++@@ -0,0 +1,11 @@
+++[target.x86_64-apple-darwin]
+++rustflags = [
+++  "-C", "link-arg=-undefined",
+++  "-C", "link-arg=dynamic_lookup",
+++]
+++
+++[target.aarch64-apple-darwin]
+++rustflags = [
+++  "-C", "link-arg=-undefined",
+++  "-C", "link-arg=dynamic_lookup",
+++]
++diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock
++index 823f4a2..286cd68 100644
++--- a/bindings/python/Cargo.lock
+++++ b/bindings/python/Cargo.lock
++@@ -90,9 +90,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "bumpalo"
++-version = "3.7.0"
+++version = "3.8.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631"
+++checksum = "8f1e260c3a9040a7c19a12468758f4c16f31a81a1fe087482be9570ec864bb6c"
++ 
++ [[package]]
++ name = "byteorder"
++@@ -152,9 +152,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "cc"
++-version = "1.0.70"
+++version = "1.0.71"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0"
+++checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd"
++ 
++ [[package]]
++ name = "cfg-if"
++@@ -185,13 +185,13 @@ dependencies = [
++ 
++ [[package]]
++ name = "console"
++-version = "0.14.1"
+++version = "0.15.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "3993e6445baa160675931ec041a5e03ca84b9c6e32a056150d3aa2bdda0a1f45"
+++checksum = "a28b32d32ca44b70c3e4acd7db1babf555fa026e385fb95f18028f88848b3c31"
++ dependencies = [
++  "encode_unicode",
++- "lazy_static",
++  "libc",
+++ "once_cell",
++  "regex",
++  "terminal_size",
++  "unicode-width",
++@@ -200,9 +200,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "core-foundation"
++-version = "0.9.1"
+++version = "0.9.2"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "0a89e2ae426ea83155dccf10c0fa6b1463ef6d5fcb44cee0b224a408fa640a62"
+++checksum = "6888e10551bb93e424d8df1d07f1a8b4fceb0001a3a4b048bfc47554946f47b3"
++ dependencies = [
++  "core-foundation-sys",
++  "libc",
++@@ -210,9 +210,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "core-foundation-sys"
++-version = "0.8.2"
+++version = "0.8.3"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b"
+++checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
++ 
++ [[package]]
++ name = "cpufeatures"
++@@ -276,16 +276,6 @@ dependencies = [
++  "lazy_static",
++ ]
++ 
++-[[package]]
++-name = "ctor"
++-version = "0.1.21"
++-source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "ccc0a48a9b826acdf4028595adc9db92caea352f7af011a3034acd172a52a0aa"
++-dependencies = [
++- "quote",
++- "syn",
++-]
++-
++ [[package]]
++ name = "darling"
++ version = "0.10.2"
++@@ -389,9 +379,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
++ 
++ [[package]]
++ name = "encoding_rs"
++-version = "0.8.28"
+++version = "0.8.29"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065"
+++checksum = "a74ea89a0a1b98f6332de42c95baff457ada66d1cb4030f9ff151b2041a1c746"
++ dependencies = [
++  "cfg-if 1.0.0",
++ ]
++@@ -432,9 +422,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "flate2"
++-version = "1.0.21"
+++version = "1.0.22"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "80edafed416a46fb378521624fab1cfa2eb514784fd8921adbe8a8d8321da811"
+++checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f"
++ dependencies = [
++  "cfg-if 1.0.0",
++  "crc32fast",
++@@ -570,17 +560,6 @@ dependencies = [
++  "wasi 0.10.2+wasi-snapshot-preview1",
++ ]
++ 
++-[[package]]
++-name = "ghost"
++-version = "0.1.2"
++-source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "1a5bcf1bbeab73aa4cf2fde60a846858dc036163c7c33bec309f8d17de785479"
++-dependencies = [
++- "proc-macro2",
++- "quote",
++- "syn",
++-]
++-
++ [[package]]
++ name = "glob"
++ version = "0.3.0"
++@@ -589,9 +568,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
++ 
++ [[package]]
++ name = "h2"
++-version = "0.3.4"
+++version = "0.3.7"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "d7f3675cfef6a30c8031cf9e6493ebdc3bb3272a3fea3923c4210d1830e6a472"
+++checksum = "7fd819562fcebdac5afc5c113c3ec36f902840b70fd4fc458799c8ce4607ae55"
++ dependencies = [
++  "bytes",
++  "fnv",
++@@ -623,9 +602,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "http"
++-version = "0.2.4"
+++version = "0.2.5"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11"
+++checksum = "1323096b05d41827dadeaee54c9981958c0f94e670bc94ed80037d1a7b8b186b"
++ dependencies = [
++  "bytes",
++  "fnv",
++@@ -634,9 +613,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "http-body"
++-version = "0.4.3"
+++version = "0.4.4"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "399c583b2979440c60be0821a6199eca73bc3c8dcd9d070d75ac726e2c6186e5"
+++checksum = "1ff4f84919677303da5f147645dbea6b1881f368d03ac84e1dc09031ebd7b2c6"
++ dependencies = [
++  "bytes",
++  "http",
++@@ -666,9 +645,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "hyper"
++-version = "0.14.12"
+++version = "0.14.14"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "13f67199e765030fa08fe0bd581af683f0d5bc04ea09c2b1102012c5fb90e7fd"
+++checksum = "2b91bb1f221b6ea1f1e4371216b70f40748774c2fb5971b450c07773fb92d26b"
++ dependencies = [
++  "bytes",
++  "futures-channel",
++@@ -765,35 +744,13 @@ dependencies = [
++ 
++ [[package]]
++ name = "instant"
++-version = "0.1.10"
+++version = "0.1.12"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "bee0328b1209d157ef001c94dd85b4f8f64139adb0eac2659f4b08382b2f474d"
+++checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
++ dependencies = [
++  "cfg-if 1.0.0",
++ ]
++ 
++-[[package]]
++-name = "inventory"
++-version = "0.1.10"
++-source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "0f0f7efb804ec95e33db9ad49e4252f049e37e8b0a4652e3cd61f7999f2eff7f"
++-dependencies = [
++- "ctor",
++- "ghost",
++- "inventory-impl",
++-]
++-
++-[[package]]
++-name = "inventory-impl"
++-version = "0.1.10"
++-source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "75c094e94816723ab936484666968f5b58060492e880f3c8d00489a1e244fa51"
++-dependencies = [
++- "proc-macro2",
++- "quote",
++- "syn",
++-]
++-
++ [[package]]
++ name = "ipnet"
++ version = "2.3.1"
++@@ -826,9 +783,9 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
++ 
++ [[package]]
++ name = "js-sys"
++-version = "0.3.53"
+++version = "0.3.55"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "e4bf49d50e2961077d9c99f4b7997d770a1114f087c3c2e0069b36c13fc2979d"
+++checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84"
++ dependencies = [
++  "wasm-bindgen",
++ ]
++@@ -854,9 +811,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "libc"
++-version = "0.2.101"
+++version = "0.2.107"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21"
+++checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219"
++ 
++ [[package]]
++ name = "lock_api"
++@@ -891,6 +848,15 @@ dependencies = [
++  "rawpointer",
++ ]
++ 
+++[[package]]
+++name = "matrixmultiply"
+++version = "0.3.1"
+++source = "registry+https://github.com/rust-lang/crates.io-index"
+++checksum = "5a8a15b776d9dfaecd44b03c5828c2199cddff5247215858aac14624f8d6b741"
+++dependencies = [
+++ "rawpointer",
+++]
+++
++ [[package]]
++ name = "memchr"
++ version = "2.3.4"
++@@ -924,9 +890,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "mio"
++-version = "0.7.13"
+++version = "0.7.14"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "8c2bdb6314ec10835cd3293dd268473a835c02b7b352e788be788b3c6ca6bb16"
+++checksum = "8067b404fe97c70829f082dec8bcf4f71225d7eaea1d8645349cb76fa06205cc"
++ dependencies = [
++  "libc",
++  "log",
++@@ -968,8 +934,21 @@ version = "0.13.1"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++ checksum = "ac06db03ec2f46ee0ecdca1a1c34a99c0d188a0d83439b84bf0cb4b386e4ab09"
++ dependencies = [
++- "matrixmultiply",
++- "num-complex",
+++ "matrixmultiply 0.2.4",
+++ "num-complex 0.2.4",
+++ "num-integer",
+++ "num-traits",
+++ "rawpointer",
+++]
+++
+++[[package]]
+++name = "ndarray"
+++version = "0.15.3"
+++source = "registry+https://github.com/rust-lang/crates.io-index"
+++checksum = "08e854964160a323e65baa19a0b1a027f76d590faba01f05c0cbc3187221a8c9"
+++dependencies = [
+++ "matrixmultiply 0.3.1",
+++ "num-complex 0.4.0",
++  "num-integer",
++  "num-traits",
++  "rawpointer",
++@@ -1007,6 +986,15 @@ dependencies = [
++  "num-traits",
++ ]
++ 
+++[[package]]
+++name = "num-complex"
+++version = "0.4.0"
+++source = "registry+https://github.com/rust-lang/crates.io-index"
+++checksum = "26873667bbbb7c5182d4a37c1add32cdf09f841af72da53318fdb81543c15085"
+++dependencies = [
+++ "num-traits",
+++]
+++
++ [[package]]
++ name = "num-integer"
++ version = "0.1.44"
++@@ -1044,14 +1032,14 @@ checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
++ 
++ [[package]]
++ name = "numpy"
++-version = "0.12.2"
+++version = "0.15.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "9fd9e8e652becf4ba6c11803945f8bf463c23f482f704bb33f70ae9d22482d10"
+++checksum = "e590538dba8432d54d3587b06df73d7c044e83cfa4b200cbc7d0567f924ac0a7"
++ dependencies = [
++  "cfg-if 0.1.10",
++  "libc",
++- "ndarray",
++- "num-complex",
+++ "ndarray 0.15.3",
+++ "num-complex 0.4.0",
++  "num-traits",
++  "pyo3",
++ ]
++@@ -1064,9 +1052,9 @@ checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56"
++ 
++ [[package]]
++ name = "onig"
++-version = "6.2.0"
+++version = "6.3.1"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "b16fd3c0e73b516af509c13c4ba76ec0c987ce20d78b38cff356b8d01fc6a6c0"
+++checksum = "67ddfe2c93bb389eea6e6d713306880c7f6dcc99a75b659ce145d962c861b225"
++ dependencies = [
++  "bitflags",
++  "lazy_static",
++@@ -1076,9 +1064,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "onig_sys"
++-version = "69.7.0"
+++version = "69.7.1"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "9fd9442a09e4fbd08d196ddf419b2c79a43c3a46c800320cc841d45c2449a240"
+++checksum = "5dd3eee045c84695b53b20255bb7317063df090b68e18bfac0abb6c39cf7f33e"
++ dependencies = [
++  "cc",
++  "pkg-config",
++@@ -1092,9 +1080,9 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
++ 
++ [[package]]
++ name = "openssl"
++-version = "0.10.36"
+++version = "0.10.38"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "8d9facdb76fec0b73c406f125d44d86fdad818d66fef0531eec9233ca425ff4a"
+++checksum = "0c7ae222234c30df141154f159066c5093ff73b63204dcda7121eb082fc56a95"
++ dependencies = [
++  "bitflags",
++  "cfg-if 1.0.0",
++@@ -1112,9 +1100,9 @@ checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a"
++ 
++ [[package]]
++ name = "openssl-sys"
++-version = "0.9.66"
+++version = "0.9.70"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "1996d2d305e561b70d1ee0c53f1542833f4e1ac6ce9a6708b6ff2738ca67dc82"
+++checksum = "c6517987b3f8226b5da3661dad65ff7f300cc59fb5ea8333ca191fc65fde3edf"
++ dependencies = [
++  "autocfg",
++  "cc",
++@@ -1193,15 +1181,15 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
++ 
++ [[package]]
++ name = "pkg-config"
++-version = "0.3.19"
+++version = "0.3.22"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
+++checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f"
++ 
++ [[package]]
++ name = "ppv-lite86"
++-version = "0.2.10"
+++version = "0.2.15"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
+++checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
++ 
++ [[package]]
++ name = "proc-macro-hack"
++@@ -1211,9 +1199,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
++ 
++ [[package]]
++ name = "proc-macro2"
++-version = "1.0.29"
+++version = "1.0.32"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "b9f5105d4fdaab20335ca9565e106a5d9b82b6219b5ba735731124ac6711d23d"
+++checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43"
++ dependencies = [
++  "unicode-xid",
++ ]
++@@ -1224,38 +1212,48 @@ version = "0.1.0"
++ 
++ [[package]]
++ name = "pyo3"
++-version = "0.12.4"
+++version = "0.15.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "bf6bbbe8f70d179260b3728e5d04eb012f4f0c7988e58c11433dd689cecaa72e"
+++checksum = "64664505ce285a59b8b7e940fbe54ad65b1758a0810eddc5bc26df6f6ec8c557"
++ dependencies = [
++- "ctor",
+++ "cfg-if 1.0.0",
++  "indoc",
++- "inventory",
++  "libc",
++  "parking_lot",
++  "paste 0.1.18",
++- "pyo3cls",
+++ "pyo3-build-config",
+++ "pyo3-macros",
++  "unindent",
++ ]
++ 
++ [[package]]
++-name = "pyo3-derive-backend"
++-version = "0.12.4"
+++name = "pyo3-build-config"
+++version = "0.15.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "10ecd0eb6ed7b3d9965b4f4370b5b9e99e3e5e8742000e1c452c018f8c2a322f"
+++checksum = "5f1e4a72de84cdcd69f62211b62f62753d0c11b7b5715f3467f3754dab22a7ca"
++ dependencies = [
++- "proc-macro2",
+++ "once_cell",
+++]
+++
+++[[package]]
+++name = "pyo3-macros"
+++version = "0.15.0"
+++source = "registry+https://github.com/rust-lang/crates.io-index"
+++checksum = "244f21d0a3887a9c02018b94e3b78d693dc7eca5c56839b7796a499cc364deb4"
+++dependencies = [
+++ "pyo3-macros-backend",
++  "quote",
++  "syn",
++ ]
++ 
++ [[package]]
++-name = "pyo3cls"
++-version = "0.12.4"
+++name = "pyo3-macros-backend"
+++version = "0.15.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "d344fdaa6a834a06dd1720ff104ea12fe101dad2e8db89345af9db74c0bb11a0"
+++checksum = "b3d3d18ac41d05199bb82645d56e39f8c8b4909a0137c6f2640f03685b29f672"
++ dependencies = [
++- "pyo3-derive-backend",
+++ "proc-macro2",
+++ "pyo3-build-config",
++  "quote",
++  "syn",
++ ]
++@@ -1268,9 +1266,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
++ 
++ [[package]]
++ name = "quote"
++-version = "1.0.9"
+++version = "1.0.10"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
+++checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05"
++ dependencies = [
++  "proc-macro2",
++ ]
++@@ -1451,9 +1449,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "reqwest"
++-version = "0.11.4"
+++version = "0.11.6"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "246e9f61b9bb77df069a947682be06e31ac43ea37862e244a69f177694ea6d22"
+++checksum = "66d2927ca2f685faf0fc620ac4834690d29e7abb153add10f5812eef20b5e280"
++ dependencies = [
++  "base64 0.13.0",
++  "bytes",
++@@ -1473,6 +1471,7 @@ dependencies = [
++  "percent-encoding",
++  "pin-project-lite",
++  "serde",
+++ "serde_json",
++  "serde_urlencoded",
++  "tokio",
++  "tokio-native-tls",
++@@ -1550,9 +1549,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "serde_json"
++-version = "1.0.67"
+++version = "1.0.69"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "a7f9e390c27c3c0ce8bc5d725f6e4d30a29d26659494aa4b17535f7522c5c950"
+++checksum = "e466864e431129c7e0d3476b92f20458e5879919a0596c6472738d9fa2d342f8"
++ dependencies = [
++  "itoa",
++  "ryu",
++@@ -1573,9 +1572,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "sha2"
++-version = "0.9.6"
+++version = "0.9.8"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "9204c41a1597a8c5af23c82d1c921cb01ec0a4c59e07a9c7306062829a3903f3"
+++checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa"
++ dependencies = [
++  "block-buffer",
++  "cfg-if 1.0.0",
++@@ -1586,21 +1585,21 @@ dependencies = [
++ 
++ [[package]]
++ name = "slab"
++-version = "0.4.4"
+++version = "0.4.5"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "c307a32c1c5c437f38c7fd45d753050587732ba8628319fbdf12a7e289ccc590"
+++checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5"
++ 
++ [[package]]
++ name = "smallvec"
++-version = "1.6.1"
+++version = "1.7.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
+++checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309"
++ 
++ [[package]]
++ name = "socket2"
++-version = "0.4.1"
+++version = "0.4.2"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "765f090f0e423d2b55843402a07915add955e7d60657db13707a159727326cad"
+++checksum = "5dc90fe6c7be1a323296982db1836d1ea9e47b6839496dde9a541bc496df3516"
++ dependencies = [
++  "libc",
++  "winapi",
++@@ -1638,9 +1637,9 @@ checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c"
++ 
++ [[package]]
++ name = "syn"
++-version = "1.0.76"
+++version = "1.0.81"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "c6f107db402c2c2055242dbf4d2af0e69197202e9faacbef9571bbe47f5a1b84"
+++checksum = "f2afee18b8beb5a596ecb4a2dce128c719b4ba399d34126b9e4396e3f9860966"
++ dependencies = [
++  "proc-macro2",
++  "quote",
++@@ -1708,18 +1707,18 @@ dependencies = [
++ 
++ [[package]]
++ name = "thiserror"
++-version = "1.0.29"
+++version = "1.0.30"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "602eca064b2d83369e2b2f34b09c70b605402801927c65c11071ac911d299b88"
+++checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417"
++ dependencies = [
++  "thiserror-impl",
++ ]
++ 
++ [[package]]
++ name = "thiserror-impl"
++-version = "1.0.29"
+++version = "1.0.30"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "bad553cc2c78e8de258400763a647e80e6d1b31ee237275d756f6836d204494c"
+++checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b"
++ dependencies = [
++  "proc-macro2",
++  "quote",
++@@ -1738,9 +1737,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "tinyvec"
++-version = "1.3.1"
+++version = "1.5.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "848a1e1181b9f6753b5e96a092749e29b11d19ede67dfbbd6c7dc7e0f49b5338"
+++checksum = "f83b2a3d4d9091d0abd7eba4dc2710b1718583bd4d8992e2190720ea38f391f7"
++ dependencies = [
++  "tinyvec_macros",
++ ]
++@@ -1789,7 +1788,7 @@ dependencies = [
++  "env_logger",
++  "itertools 0.9.0",
++  "libc",
++- "ndarray",
+++ "ndarray 0.13.1",
++  "numpy",
++  "onig",
++  "pyo3",
++@@ -1802,9 +1801,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "tokio"
++-version = "1.11.0"
+++version = "1.13.0"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "b4efe6fc2395938c8155973d7be49fe8d03a843726e285e100a8a383cc0154ce"
+++checksum = "588b2d10a336da58d877567cd8fb8a14b463e2104910f8132cd054b4b96e29ee"
++ dependencies = [
++  "autocfg",
++  "bytes",
++@@ -1828,9 +1827,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "tokio-util"
++-version = "0.6.8"
+++version = "0.6.9"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "08d3725d3efa29485e87311c5b699de63cde14b00ed4d256b8318aa30ca452cd"
+++checksum = "9e99e1983e5d376cd8eb4b66604d2e99e79f5bd988c3055891dcd8c9e2604cc0"
++ dependencies = [
++  "bytes",
++  "futures-core",
++@@ -1848,9 +1847,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"
++ 
++ [[package]]
++ name = "tracing"
++-version = "0.1.26"
+++version = "0.1.29"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d"
+++checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105"
++ dependencies = [
++  "cfg-if 1.0.0",
++  "pin-project-lite",
++@@ -1859,9 +1858,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "tracing-core"
++-version = "0.1.19"
+++version = "0.1.21"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "2ca517f43f0fb96e0c3072ed5c275fe5eece87e8cb52f4a77b69226d3b1c9df8"
+++checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4"
++ dependencies = [
++  "lazy_static",
++ ]
++@@ -1880,9 +1879,9 @@ checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec"
++ 
++ [[package]]
++ name = "unicode-bidi"
++-version = "0.3.6"
+++version = "0.3.7"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "246f4c42e67e7a4e3c6106ff716a5d067d4132a642840b242e357e468a2a0085"
+++checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f"
++ 
++ [[package]]
++ name = "unicode-normalization"
++@@ -1910,9 +1909,9 @@ checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
++ 
++ [[package]]
++ name = "unicode-width"
++-version = "0.1.8"
+++version = "0.1.9"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
+++checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
++ 
++ [[package]]
++ name = "unicode-xid"
++@@ -1986,21 +1985,19 @@ checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
++ 
++ [[package]]
++ name = "wasm-bindgen"
++-version = "0.2.76"
+++version = "0.2.78"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "8ce9b1b516211d33767048e5d47fa2a381ed8b76fc48d2ce4aa39877f9f183e0"
+++checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce"
++ dependencies = [
++  "cfg-if 1.0.0",
++- "serde",
++- "serde_json",
++  "wasm-bindgen-macro",
++ ]
++ 
++ [[package]]
++ name = "wasm-bindgen-backend"
++-version = "0.2.76"
+++version = "0.2.78"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "cfe8dc78e2326ba5f845f4b5bf548401604fa20b1dd1d365fb73b6c1d6364041"
+++checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b"
++ dependencies = [
++  "bumpalo",
++  "lazy_static",
++@@ -2013,9 +2010,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "wasm-bindgen-futures"
++-version = "0.4.26"
+++version = "0.4.28"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "95fded345a6559c2cfee778d562300c581f7d4ff3edb9b0d230d69800d213972"
+++checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39"
++ dependencies = [
++  "cfg-if 1.0.0",
++  "js-sys",
++@@ -2025,9 +2022,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "wasm-bindgen-macro"
++-version = "0.2.76"
+++version = "0.2.78"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "44468aa53335841d9d6b6c023eaab07c0cd4bddbcfdee3e2bb1e8d2cb8069fef"
+++checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9"
++ dependencies = [
++  "quote",
++  "wasm-bindgen-macro-support",
++@@ -2035,9 +2032,9 @@ dependencies = [
++ 
++ [[package]]
++ name = "wasm-bindgen-macro-support"
++-version = "0.2.76"
+++version = "0.2.78"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "0195807922713af1e67dc66132c7328206ed9766af3858164fb583eedc25fbad"
+++checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab"
++ dependencies = [
++  "proc-macro2",
++  "quote",
++@@ -2048,15 +2045,15 @@ dependencies = [
++ 
++ [[package]]
++ name = "wasm-bindgen-shared"
++-version = "0.2.76"
+++version = "0.2.78"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "acdb075a845574a1fa5f09fd77e43f7747599301ea3417a9fbffdeedfc1f4a29"
+++checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc"
++ 
++ [[package]]
++ name = "web-sys"
++-version = "0.3.53"
+++version = "0.3.55"
++ source = "registry+https://github.com/rust-lang/crates.io-index"
++-checksum = "224b2f6b67919060055ef1a67807367c2066ed520c3862cc013d26cf893a783c"
+++checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb"
++ dependencies = [
++  "js-sys",
++  "wasm-bindgen",
++diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml
++index 7ac0dd9..9123be2 100644
++--- a/bindings/python/Cargo.toml
+++++ b/bindings/python/Cargo.toml
++@@ -14,8 +14,8 @@ serde = { version = "1.0", features = [ "rc", "derive" ]}
++ serde_json = "1.0"
++ libc = "0.2"
++ env_logger = "0.7.1"
++-pyo3 = "0.12"
++-numpy = "0.12"
+++pyo3 = "0.15.0"
+++numpy = "0.15.0"
++ ndarray = "0.13"
++ onig = { version = "6.0", default-features = false }
++ itertools = "0.9"
++@@ -29,9 +29,3 @@ tempfile = "3.1"
++ 
++ [features]
++ default = ["pyo3/extension-module"]
++-
++-[target.x86_64-apple-darwin]
++-rustflags = [
++-  "-C", "link-arg=-undefined",
++-  "-C", "link-arg=dynamic_lookup",
++-]
++diff --git a/bindings/python/src/decoders.rs b/bindings/python/src/decoders.rs
++index 5f15838..a5524dc 100644
++--- a/bindings/python/src/decoders.rs
+++++ b/bindings/python/src/decoders.rs
++@@ -21,7 +21,7 @@ use super::error::ToPyResult;
++ ///
++ /// This class is not supposed to be instantiated directly. Instead, any implementation of
++ /// a Decoder will return an instance of this class when instantiated.
++-#[pyclass(dict, module = "tokenizers.decoders", name=Decoder)]
+++#[pyclass(dict, module = "tokenizers.decoders", name = "Decoder")]
++ #[derive(Clone, Deserialize, Serialize)]
++ pub struct PyDecoder {
++     #[serde(flatten)]
++@@ -97,7 +97,7 @@ impl PyDecoder {
++     ///
++     /// Returns:
++     ///     :obj:`str`: The decoded string
++-    #[text_signature = "(self, tokens)"]
+++    #[pyo3(text_signature = "(self, tokens)")]
++     fn decode(&self, tokens: Vec<String>) -> PyResult<String> {
++         ToPyResult(self.decoder.decode(tokens)).into()
++     }
++@@ -141,8 +141,8 @@ macro_rules! setter {
++ ///
++ /// This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.ByteLevel`
++ /// :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
++-#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name=ByteLevel)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name = "ByteLevel")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyByteLevelDec {}
++ #[pymethods]
++ impl PyByteLevelDec {
++@@ -161,8 +161,8 @@ impl PyByteLevelDec {
++ ///     cleanup (:obj:`bool`, `optional`, defaults to :obj:`True`):
++ ///         Whether to cleanup some tokenization artifacts. Mainly spaces before punctuation,
++ ///         and some abbreviated english forms.
++-#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name=WordPiece)]
++-#[text_signature = "(self, prefix=\"##\", cleanup=True)"]
+++#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name = "WordPiece")]
+++#[pyo3(text_signature = "(self, prefix=\"##\", cleanup=True)")]
++ pub struct PyWordPieceDec {}
++ #[pymethods]
++ impl PyWordPieceDec {
++@@ -203,8 +203,8 @@ impl PyWordPieceDec {
++ ///     add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`True`):
++ ///         Whether to add a space to the first word if there isn't already one. This
++ ///         lets us treat `hello` exactly like `say hello`.
++-#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name=Metaspace)]
++-#[text_signature = "(self, replacement = \"▁\", add_prefix_space = True)"]
+++#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name = "Metaspace")]
+++#[pyo3(text_signature = "(self, replacement = \"▁\", add_prefix_space = True)")]
++ pub struct PyMetaspaceDec {}
++ #[pymethods]
++ impl PyMetaspaceDec {
++@@ -244,8 +244,8 @@ impl PyMetaspaceDec {
++ ///     suffix (:obj:`str`, `optional`, defaults to :obj:`</w>`):
++ ///         The suffix that was used to caracterize an end-of-word. This suffix will
++ ///         be replaced by whitespaces during the decoding
++-#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name=BPEDecoder)]
++-#[text_signature = "(self, suffix=\"</w>\")"]
+++#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name = "BPEDecoder")]
+++#[pyo3(text_signature = "(self, suffix=\"</w>\")")]
++ pub struct PyBPEDecoder {}
++ #[pymethods]
++ impl PyBPEDecoder {
++@@ -276,8 +276,8 @@ impl PyBPEDecoder {
++ ///     cleanup (:obj:`bool`, `optional`, defaults to :obj:`True`):
++ ///         Whether to cleanup some tokenization artifacts.
++ ///         Mainly spaces before punctuation, and some abbreviated english forms.
++-#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name=CTC)]
++-#[text_signature = "(self, pad_token=\"<pad>\", word_delimiter_token=\"|\", cleanup=True)"]
+++#[pyclass(extends=PyDecoder, module = "tokenizers.decoders", name = "CTC")]
+++#[pyo3(text_signature = "(self, pad_token=\"<pad>\", word_delimiter_token=\"|\", cleanup=True)")]
++ pub struct PyCTCDecoder {}
++ #[pymethods]
++ impl PyCTCDecoder {
++@@ -421,7 +421,7 @@ mod test {
++         let gil = Python::acquire_gil();
++         assert_eq!(
++             "tokenizers.decoders.Metaspace",
++-            py_meta.as_ref(gil.python()).get_type().name()
+++            py_meta.as_ref(gil.python()).get_type().name().unwrap()
++         );
++     }
++ 
++diff --git a/bindings/python/src/encoding.rs b/bindings/python/src/encoding.rs
++index 976b2c6..c4e2743 100644
++--- a/bindings/python/src/encoding.rs
+++++ b/bindings/python/src/encoding.rs
++@@ -9,7 +9,7 @@ use tokenizers as tk;
++ use crate::error::{deprecation_warning, PyError};
++ 
++ /// The :class:`~tokenizers.Encoding` represents the output of a :class:`~tokenizers.Tokenizer`.
++-#[pyclass(dict, module = "tokenizers", name=Encoding)]
+++#[pyclass(dict, module = "tokenizers", name = "Encoding")]
++ #[repr(transparent)]
++ pub struct PyEncoding {
++     pub encoding: tk::tokenizer::Encoding,
++@@ -86,7 +86,7 @@ impl PyEncoding {
++     ///     :class:`~tokenizers.Encoding`: The resulting Encoding
++     #[staticmethod]
++     #[args(growing_offsets = true)]
++-    #[text_signature = "(encodings, growing_offsets=True)"]
+++    #[pyo3(text_signature = "(encodings, growing_offsets=True)")]
++     fn merge(encodings: Vec<PyRef<PyEncoding>>, growing_offsets: bool) -> PyEncoding {
++         tk::tokenizer::Encoding::merge(
++             encodings.into_iter().map(|e| e.encoding.clone()),
++@@ -108,7 +108,7 @@ impl PyEncoding {
++     ///
++     /// Set the given sequence index for the whole range of tokens contained in this
++     /// :class:`~tokenizers.Encoding`.
++-    #[text_signature = "(self, sequence_id)"]
+++    #[pyo3(text_signature = "(self, sequence_id)")]
++     fn set_sequence_id(&mut self, sequence_id: usize) {
++         self.encoding.set_sequence_id(sequence_id);
++     }
++@@ -270,7 +270,7 @@ impl PyEncoding {
++     /// Returns:
++     ///     :obj:`Tuple[int, int]`: The range of tokens: :obj:`(first, last + 1)`
++     #[args(sequence_index = 0)]
++-    #[text_signature = "(self, word_index, sequence_index=0)"]
+++    #[pyo3(text_signature = "(self, word_index, sequence_index=0)")]
++     fn word_to_tokens(&self, word_index: u32, sequence_index: usize) -> Option<(usize, usize)> {
++         self.encoding.word_to_tokens(word_index, sequence_index)
++     }
++@@ -286,7 +286,7 @@ impl PyEncoding {
++     /// Returns:
++     ///     :obj:`Tuple[int, int]`: The range of characters (span) :obj:`(first, last + 1)`
++     #[args(sequence_index = 0)]
++-    #[text_signature = "(self, word_index, sequence_index=0)"]
+++    #[pyo3(text_signature = "(self, word_index, sequence_index=0)")]
++     fn word_to_chars(&self, word_index: u32, sequence_index: usize) -> Option<Offsets> {
++         self.encoding.word_to_chars(word_index, sequence_index)
++     }
++@@ -302,7 +302,7 @@ impl PyEncoding {
++     ///
++     /// Returns:
++     ///     :obj:`int`: The sequence id of the given token
++-    #[text_signature = "(self, token_index)"]
+++    #[pyo3(text_signature = "(self, token_index)")]
++     fn token_to_sequence(&self, token_index: usize) -> Option<usize> {
++         self.encoding.token_to_sequence(token_index)
++     }
++@@ -319,7 +319,7 @@ impl PyEncoding {
++     ///
++     /// Returns:
++     ///     :obj:`Tuple[int, int]`: The token offsets :obj:`(first, last + 1)`
++-    #[text_signature = "(self, token_index)"]
+++    #[pyo3(text_signature = "(self, token_index)")]
++     fn token_to_chars(&self, token_index: usize) -> Option<Offsets> {
++         let (_, offsets) = self.encoding.token_to_chars(token_index)?;
++         Some(offsets)
++@@ -337,7 +337,7 @@ impl PyEncoding {
++     ///
++     /// Returns:
++     ///     :obj:`int`: The index of the word in the relevant input sequence.
++-    #[text_signature = "(self, token_index)"]
+++    #[pyo3(text_signature = "(self, token_index)")]
++     fn token_to_word(&self, token_index: usize) -> Option<u32> {
++         let (_, word_idx) = self.encoding.token_to_word(token_index)?;
++         Some(word_idx)
++@@ -354,7 +354,7 @@ impl PyEncoding {
++     /// Returns:
++     ///     :obj:`int`: The index of the token that contains this char in the encoded sequence
++     #[args(sequence_index = 0)]
++-    #[text_signature = "(self, char_pos, sequence_index=0)"]
+++    #[pyo3(text_signature = "(self, char_pos, sequence_index=0)")]
++     fn char_to_token(&self, char_pos: usize, sequence_index: usize) -> Option<usize> {
++         self.encoding.char_to_token(char_pos, sequence_index)
++     }
++@@ -370,7 +370,7 @@ impl PyEncoding {
++     /// Returns:
++     ///     :obj:`int`: The index of the word that contains this char in the input sequence
++     #[args(sequence_index = 0)]
++-    #[text_signature = "(self, char_pos, sequence_index=0)"]
+++    #[pyo3(text_signature = "(self, char_pos, sequence_index=0)")]
++     fn char_to_word(&self, char_pos: usize, sequence_index: usize) -> Option<u32> {
++         self.encoding.char_to_word(char_pos, sequence_index)
++     }
++@@ -393,7 +393,7 @@ impl PyEncoding {
++     ///     pad_token (:obj:`str`, defaults to `[PAD]`):
++     ///         The pad token to use
++     #[args(kwargs = "**")]
++-    #[text_signature = "(self, length, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]')"]
+++    #[pyo3(text_signature = "(self, length, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]')")]
++     fn pad(&mut self, length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
++         let mut pad_id = 0;
++         let mut pad_type_id = 0;
++@@ -445,7 +445,7 @@ impl PyEncoding {
++     ///         Truncate direction
++     #[args(stride = "0")]
++     #[args(direction = "\"right\"")]
++-    #[text_signature = "(self, max_length, stride=0, direction='right')"]
+++    #[pyo3(text_signature = "(self, max_length, stride=0, direction='right')")]
++     fn truncate(&mut self, max_length: usize, stride: usize, direction: &str) -> PyResult<()> {
++         let tdir = match direction {
++             "left" => Ok(TruncationDirection::Left),
++diff --git a/bindings/python/src/error.rs b/bindings/python/src/error.rs
++index a6bcaf3..1e8c5a1 100644
++--- a/bindings/python/src/error.rs
+++++ b/bindings/python/src/error.rs
++@@ -37,7 +37,7 @@ impl<T> ToPyResult<T> {
++ pub(crate) fn deprecation_warning(version: &str, message: &str) -> PyResult<()> {
++     let gil = pyo3::Python::acquire_gil();
++     let python = gil.python();
++-    let deprecation_warning = python.import("builtins")?.get("DeprecationWarning")?;
+++    let deprecation_warning = python.import("builtins")?.getattr("DeprecationWarning")?;
++     let full_message = format!("Deprecated in {}: {}", version, message);
++     pyo3::PyErr::warn(python, deprecation_warning, &full_message, 0)
++ }
++diff --git a/bindings/python/src/models.rs b/bindings/python/src/models.rs
++index e7aaa79..779d66e 100644
++--- a/bindings/python/src/models.rs
+++++ b/bindings/python/src/models.rs
++@@ -24,7 +24,7 @@ use super::error::{deprecation_warning, ToPyResult};
++ /// will contain and manage the learned vocabulary.
++ ///
++ /// This class cannot be constructed directly. Please use one of the concrete models.
++-#[pyclass(module = "tokenizers.models", name=Model)]
+++#[pyclass(module = "tokenizers.models", name = "Model")]
++ #[derive(Clone, Serialize, Deserialize)]
++ pub struct PyModel {
++     #[serde(flatten)]
++@@ -132,7 +132,7 @@ impl PyModel {
++     ///
++     /// Returns:
++     ///     A :obj:`List` of :class:`~tokenizers.Token`: The generated tokens
++-    #[text_signature = "(self, sequence)"]
+++    #[pyo3(text_signature = "(self, sequence)")]
++     fn tokenize(&self, sequence: &str) -> PyResult<Vec<PyToken>> {
++         Ok(ToPyResult(self.model.read().unwrap().tokenize(sequence))
++             .into_py()?
++@@ -149,7 +149,7 @@ impl PyModel {
++     ///
++     /// Returns:
++     ///     :obj:`int`: The ID associated to the token
++-    #[text_signature = "(self, tokens)"]
+++    #[pyo3(text_signature = "(self, tokens)")]
++     fn token_to_id(&self, token: &str) -> Option<u32> {
++         self.model.read().unwrap().token_to_id(token)
++     }
++@@ -162,7 +162,7 @@ impl PyModel {
++     ///
++     /// Returns:
++     ///     :obj:`str`: The token associated to the ID
++-    #[text_signature = "(self, id)"]
+++    #[pyo3(text_signature = "(self, id)")]
++     fn id_to_token(&self, id: u32) -> Option<String> {
++         self.model.read().unwrap().id_to_token(id)
++     }
++@@ -182,7 +182,7 @@ impl PyModel {
++     ///
++     /// Returns:
++     ///     :obj:`List[str]`: The list of saved files
++-    #[text_signature = "(self, folder, prefix)"]
+++    #[pyo3(text_signature = "(self, folder, prefix)")]
++     fn save<'a>(
++         &self,
++         folder: &str,
++@@ -248,8 +248,8 @@ impl PyModel {
++ ///
++ ///     fuse_unk (:obj:`bool`, `optional`):
++ ///         Whether to fuse any subsequent unknown tokens into a single one
++-#[pyclass(extends=PyModel, module = "tokenizers.models", name=BPE)]
++-#[text_signature = "(self, vocab=None, merges=None, cache_capacity=None, dropout=None, unk_token=None, continuing_subword_prefix=None, end_of_word_suffix=None, fuse_unk=None)"]
+++#[pyclass(extends=PyModel, module = "tokenizers.models", name = "BPE")]
+++#[pyo3(text_signature = "(self, vocab=None, merges=None, cache_capacity=None, dropout=None, unk_token=None, continuing_subword_prefix=None, end_of_word_suffix=None, fuse_unk=None)")]
++ pub struct PyBPE {}
++ 
++ impl PyBPE {
++@@ -437,7 +437,7 @@ impl PyBPE {
++     ///     A :obj:`Tuple` with the vocab and the merges:
++     ///         The vocabulary and merges loaded into memory
++     #[staticmethod]
++-    #[text_signature = "(self, vocab, merges)"]
+++    #[pyo3(text_signature = "(self, vocab, merges)")]
++     fn read_file(vocab: &str, merges: &str) -> PyResult<(Vocab, Merges)> {
++         BPE::read_file(vocab, merges).map_err(|e| {
++             exceptions::PyException::new_err(format!(
++@@ -469,7 +469,7 @@ impl PyBPE {
++     ///     :class:`~tokenizers.models.BPE`: An instance of BPE loaded from these files
++     #[classmethod]
++     #[args(kwargs = "**")]
++-    #[text_signature = "(cls, vocab, merge, **kwargs)"]
+++    #[pyo3(text_signature = "(cls, vocab, merge, **kwargs)")]
++     fn from_file(
++         _cls: &PyType,
++         py: Python,
++@@ -502,8 +502,8 @@ impl PyBPE {
++ ///
++ ///     max_input_chars_per_word (:obj:`int`, `optional`):
++ ///         The maximum number of characters to authorize in a single word.
++-#[pyclass(extends=PyModel, module = "tokenizers.models", name=WordPiece)]
++-#[text_signature = "(self, vocab, unk_token, max_input_chars_per_word)"]
+++#[pyclass(extends=PyModel, module = "tokenizers.models", name = "WordPiece")]
+++#[pyo3(text_signature = "(self, vocab, unk_token, max_input_chars_per_word)")]
++ pub struct PyWordPiece {}
++ 
++ impl PyWordPiece {
++@@ -613,7 +613,7 @@ impl PyWordPiece {
++     /// Returns:
++     ///     :obj:`Dict[str, int]`: The vocabulary as a :obj:`dict`
++     #[staticmethod]
++-    #[text_signature = "(vocab)"]
+++    #[pyo3(text_signature = "(vocab)")]
++     fn read_file(vocab: &str) -> PyResult<Vocab> {
++         WordPiece::read_file(vocab).map_err(|e| {
++             exceptions::PyException::new_err(format!("Error while reading WordPiece file: {}", e))
++@@ -639,7 +639,7 @@ impl PyWordPiece {
++     ///     :class:`~tokenizers.models.WordPiece`: An instance of WordPiece loaded from file
++     #[classmethod]
++     #[args(kwargs = "**")]
++-    #[text_signature = "(vocab, **kwargs)"]
+++    #[pyo3(text_signature = "(vocab, **kwargs)")]
++     fn from_file(
++         _cls: &PyType,
++         py: Python,
++@@ -663,8 +663,8 @@ impl PyWordPiece {
++ ///
++ ///     unk_token (:obj:`str`, `optional`):
++ ///         The unknown token to be used by the model.
++-#[pyclass(extends=PyModel, module = "tokenizers.models", name=WordLevel)]
++-#[text_signature = "(self, vocab, unk_token)"]
+++#[pyclass(extends=PyModel, module = "tokenizers.models", name = "WordLevel")]
+++#[pyo3(text_signature = "(self, vocab, unk_token)")]
++ pub struct PyWordLevel {}
++ 
++ #[pymethods]
++@@ -725,7 +725,7 @@ impl PyWordLevel {
++     /// Returns:
++     ///     :obj:`Dict[str, int]`: The vocabulary as a :obj:`dict`
++     #[staticmethod]
++-    #[text_signature = "(vocab)"]
+++    #[pyo3(text_signature = "(vocab)")]
++     fn read_file(vocab: &str) -> PyResult<Vocab> {
++         WordLevel::read_file(vocab).map_err(|e| {
++             exceptions::PyException::new_err(format!("Error while reading WordLevel file: {}", e))
++@@ -751,7 +751,7 @@ impl PyWordLevel {
++     ///     :class:`~tokenizers.models.WordLevel`: An instance of WordLevel loaded from file
++     #[classmethod]
++     #[args(unk_token = "None")]
++-    #[text_signature = "(vocab, unk_token)"]
+++    #[pyo3(text_signature = "(vocab, unk_token)")]
++     fn from_file(
++         _cls: &PyType,
++         py: Python,
++@@ -773,8 +773,8 @@ impl PyWordLevel {
++ /// Args:
++ ///     vocab (:obj:`List[Tuple[str, float]]`, `optional`):
++ ///         A list of vocabulary items and their relative score [("am", -0.2442),...]
++-#[pyclass(extends=PyModel, module = "tokenizers.models", name=Unigram)]
++-#[text_signature = "(self, vocab)"]
+++#[pyclass(extends=PyModel, module = "tokenizers.models", name = "Unigram")]
+++#[pyo3(text_signature = "(self, vocab)")]
++ pub struct PyUnigram {}
++ 
++ #[pymethods]
++@@ -810,7 +810,7 @@ mod test {
++         let gil = Python::acquire_gil();
++         assert_eq!(
++             "tokenizers.models.BPE",
++-            py_bpe.as_ref(gil.python()).get_type().name()
+++            py_bpe.as_ref(gil.python()).get_type().name().unwrap()
++         );
++     }
++ 
++diff --git a/bindings/python/src/normalizers.rs b/bindings/python/src/normalizers.rs
++index fb1a5cc..59f9c64 100644
++--- a/bindings/python/src/normalizers.rs
+++++ b/bindings/python/src/normalizers.rs
++@@ -43,7 +43,7 @@ impl PyNormalizedStringMut<'_> {
++ ///
++ /// This class is not supposed to be instantiated directly. Instead, any implementation of a
++ /// Normalizer will return an instance of this class when instantiated.
++-#[pyclass(dict, module = "tokenizers.normalizers", name=Normalizer)]
+++#[pyclass(dict, module = "tokenizers.normalizers", name = "Normalizer")]
++ #[derive(Clone, Serialize, Deserialize)]
++ pub struct PyNormalizer {
++     #[serde(flatten)]
++@@ -144,7 +144,7 @@ impl PyNormalizer {
++     ///     normalized (:class:`~tokenizers.NormalizedString`):
++     ///         The normalized string on which to apply this
++     ///         :class:`~tokenizers.normalizers.Normalizer`
++-    #[text_signature = "(self, normalized)"]
+++    #[pyo3(text_signature = "(self, normalized)")]
++     fn normalize(&self, mut normalized: PyNormalizedStringMut) -> PyResult<()> {
++         normalized.normalize_with(&self.normalizer)
++     }
++@@ -162,7 +162,7 @@ impl PyNormalizer {
++     ///
++     /// Returns:
++     ///     :obj:`str`: A string after normalization
++-    #[text_signature = "(self, sequence)"]
+++    #[pyo3(text_signature = "(self, sequence)")]
++     fn normalize_str(&self, sequence: &str) -> PyResult<String> {
++         let mut normalized = NormalizedString::from(sequence);
++         ToPyResult(self.normalizer.normalize(&mut normalized)).into_py()?;
++@@ -217,8 +217,8 @@ macro_rules! setter {
++ ///
++ ///     lowercase (:obj:`bool`, `optional`, defaults to :obj:`True`):
++ ///         Whether to lowercase.
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=BertNormalizer)]
++-#[text_signature = "(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "BertNormalizer")]
+++#[pyo3(text_signature = "(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True)")]
++ pub struct PyBertNormalizer {}
++ #[pymethods]
++ impl PyBertNormalizer {
++@@ -287,8 +287,8 @@ impl PyBertNormalizer {
++ }
++ 
++ /// NFD Unicode Normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=NFD)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "NFD")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyNFD {}
++ #[pymethods]
++ impl PyNFD {
++@@ -299,8 +299,8 @@ impl PyNFD {
++ }
++ 
++ /// NFKD Unicode Normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=NFKD)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "NFKD")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyNFKD {}
++ #[pymethods]
++ impl PyNFKD {
++@@ -311,8 +311,8 @@ impl PyNFKD {
++ }
++ 
++ /// NFC Unicode Normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=NFC)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "NFC")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyNFC {}
++ #[pymethods]
++ impl PyNFC {
++@@ -323,8 +323,8 @@ impl PyNFC {
++ }
++ 
++ /// NFKC Unicode Normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=NFKC)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "NFKC")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyNFKC {}
++ #[pymethods]
++ impl PyNFKC {
++@@ -340,7 +340,7 @@ impl PyNFKC {
++ /// Args:
++ ///     normalizers (:obj:`List[Normalizer]`):
++ ///         A list of Normalizer to be run as a sequence
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Sequence)]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Sequence")]
++ pub struct PySequence {}
++ #[pymethods]
++ impl PySequence {
++@@ -373,8 +373,8 @@ impl PySequenceProtocol for PySequence {
++ }
++ 
++ /// Lowercase Normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Lowercase)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Lowercase")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyLowercase {}
++ #[pymethods]
++ impl PyLowercase {
++@@ -385,8 +385,8 @@ impl PyLowercase {
++ }
++ 
++ /// Strip normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Strip)]
++-#[text_signature = "(self, left=True, right=True)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Strip")]
+++#[pyo3(text_signature = "(self, left=True, right=True)")]
++ pub struct PyStrip {}
++ #[pymethods]
++ impl PyStrip {
++@@ -418,8 +418,8 @@ impl PyStrip {
++ }
++ 
++ /// StripAccents normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=StripAccents)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "StripAccents")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyStripAccents {}
++ #[pymethods]
++ impl PyStripAccents {
++@@ -430,8 +430,8 @@ impl PyStripAccents {
++ }
++ 
++ /// Nmt normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Nmt)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Nmt")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyNmt {}
++ #[pymethods]
++ impl PyNmt {
++@@ -443,8 +443,8 @@ impl PyNmt {
++ 
++ /// Precompiled normalizer
++ /// Don't use manually it is used for compatiblity for SentencePiece.
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Precompiled)]
++-#[text_signature = "(self, precompiled_charsmap)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Precompiled")]
+++#[pyo3(text_signature = "(self, precompiled_charsmap)")]
++ pub struct PyPrecompiled {}
++ #[pymethods]
++ impl PyPrecompiled {
++@@ -466,8 +466,8 @@ impl PyPrecompiled {
++ }
++ 
++ /// Replace normalizer
++-#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name=Replace)]
++-#[text_signature = "(self, pattern, content)"]
+++#[pyclass(extends=PyNormalizer, module = "tokenizers.normalizers", name = "Replace")]
+++#[pyo3(text_signature = "(self, pattern, content)")]
++ pub struct PyReplace {}
++ #[pymethods]
++ impl PyReplace {
++@@ -631,7 +631,7 @@ mod test {
++         let gil = Python::acquire_gil();
++         assert_eq!(
++             "tokenizers.normalizers.NFC",
++-            py_nfc.as_ref(gil.python()).get_type().name()
+++            py_nfc.as_ref(gil.python()).get_type().name().unwrap()
++         );
++     }
++ 
++diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs
++index 947e267..3d8ab58 100644
++--- a/bindings/python/src/pre_tokenizers.rs
+++++ b/bindings/python/src/pre_tokenizers.rs
++@@ -28,7 +28,7 @@ use super::utils::*;
++ ///
++ /// This class is not supposed to be instantiated directly. Instead, any implementation of a
++ /// PreTokenizer will return an instance of this class when instantiated.
++-#[pyclass(dict, module = "tokenizers.pre_tokenizers", name=PreTokenizer)]
+++#[pyclass(dict, module = "tokenizers.pre_tokenizers", name = "PreTokenizer")]
++ #[derive(Clone, Serialize, Deserialize)]
++ pub struct PyPreTokenizer {
++     #[serde(flatten)]
++@@ -146,7 +146,7 @@ impl PyPreTokenizer {
++     ///     pretok (:class:`~tokenizers.PreTokenizedString):
++     ///         The pre-tokenized string on which to apply this
++     ///         :class:`~tokenizers.pre_tokenizers.PreTokenizer`
++-    #[text_signature = "(self, pretok)"]
+++    #[pyo3(text_signature = "(self, pretok)")]
++     fn pre_tokenize(&self, pretok: &mut PyPreTokenizedString) -> PyResult<()> {
++         ToPyResult(self.pretok.pre_tokenize(&mut pretok.pretok)).into()
++     }
++@@ -166,7 +166,7 @@ impl PyPreTokenizer {
++     /// Returns:
++     ///     :obj:`List[Tuple[str, Offsets]]`:
++     ///         A list of tuple with the pre-tokenized parts and their offsets
++-    #[text_signature = "(self, sequence)"]
+++    #[pyo3(text_signature = "(self, sequence)")]
++     fn pre_tokenize_str(&self, s: &str) -> PyResult<Vec<(String, Offsets)>> {
++         let mut pretokenized = tk::tokenizer::PreTokenizedString::from(s);
++ 
++@@ -228,8 +228,8 @@ macro_rules! setter {
++ ///     add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`True`):
++ ///         Whether to add a space to the first word if there isn't already one. This
++ ///         lets us treat `hello` exactly like `say hello`.
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=ByteLevel)]
++-#[text_signature = "(self, add_prefix_space=True)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "ByteLevel")]
+++#[pyo3(text_signature = "(self, add_prefix_space=True)")]
++ pub struct PyByteLevel {}
++ #[pymethods]
++ impl PyByteLevel {
++@@ -263,7 +263,7 @@ impl PyByteLevel {
++     /// Returns:
++     ///     :obj:`List[str]`: A list of characters that compose the alphabet
++     #[staticmethod]
++-    #[text_signature = "()"]
+++    #[pyo3(text_signature = "()")]
++     fn alphabet() -> Vec<String> {
++         ByteLevel::alphabet()
++             .into_iter()
++@@ -273,8 +273,8 @@ impl PyByteLevel {
++ }
++ 
++ /// This pre-tokenizer simply splits using the following regex: `\w+|[^\w\s]+`
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=Whitespace)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "Whitespace")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyWhitespace {}
++ #[pymethods]
++ impl PyWhitespace {
++@@ -285,8 +285,8 @@ impl PyWhitespace {
++ }
++ 
++ /// This pre-tokenizer simply splits on the whitespace. Works like `.split()`
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=WhitespaceSplit)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "WhitespaceSplit")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyWhitespaceSplit {}
++ #[pymethods]
++ impl PyWhitespaceSplit {
++@@ -313,8 +313,8 @@ impl PyWhitespaceSplit {
++ ///
++ ///     invert (:obj:`bool`, `optional`, defaults to :obj:`False`):
++ ///         Whether to invert the pattern.
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=Split)]
++-#[text_signature = "(self, pattern, behavior, invert=False)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "Split")]
+++#[pyo3(text_signature = "(self, pattern, behavior, invert=False)")]
++ pub struct PySplit {}
++ #[pymethods]
++ impl PySplit {
++@@ -343,7 +343,7 @@ impl PySplit {
++ /// Args:
++ ///     delimiter: str:
++ ///         The delimiter char that will be used to split input
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=CharDelimiterSplit)]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "CharDelimiterSplit")]
++ pub struct PyCharDelimiterSplit {}
++ #[pymethods]
++ impl PyCharDelimiterSplit {
++@@ -374,8 +374,8 @@ impl PyCharDelimiterSplit {
++ ///
++ /// This pre-tokenizer splits tokens on spaces, and also on punctuation.
++ /// Each occurence of a punctuation character will be treated separately.
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=BertPreTokenizer)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "BertPreTokenizer")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyBertPreTokenizer {}
++ #[pymethods]
++ impl PyBertPreTokenizer {
++@@ -392,8 +392,8 @@ impl PyBertPreTokenizer {
++ ///         The behavior to use when splitting.
++ ///         Choices: "removed", "isolated" (default), "merged_with_previous", "merged_with_next",
++ ///         "contiguous"
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=Punctuation)]
++-#[text_signature = "(self, behavior=\"isolated\")"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "Punctuation")]
+++#[pyo3(text_signature = "(self, behavior=\"isolated\")")]
++ pub struct PyPunctuation {}
++ #[pymethods]
++ impl PyPunctuation {
++@@ -405,8 +405,8 @@ impl PyPunctuation {
++ }
++ 
++ /// This pre-tokenizer composes other pre_tokenizers and applies them in sequence
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=Sequence)]
++-#[text_signature = "(self, pretokenizers)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "Sequence")]
+++#[pyo3(text_signature = "(self, pretokenizers)")]
++ pub struct PySequence {}
++ #[pymethods]
++ impl PySequence {
++@@ -446,8 +446,8 @@ impl PySequence {
++ ///     add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`True`):
++ ///         Whether to add a space to the first word if there isn't already one. This
++ ///         lets us treat `hello` exactly like `say hello`.
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=Metaspace)]
++-#[text_signature = "(self, replacement=\"_\", add_prefix_space=True)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "Metaspace")]
+++#[pyo3(text_signature = "(self, replacement=\"_\", add_prefix_space=True)")]
++ pub struct PyMetaspace {}
++ #[pymethods]
++ impl PyMetaspace {
++@@ -496,8 +496,8 @@ impl PyMetaspace {
++ ///         If set to False, digits will grouped as follows::
++ ///
++ ///             "Call 123 please" -> "Call ", "123", " please"
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=Digits)]
++-#[text_signature = "(self, individual_digits=False)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "Digits")]
+++#[pyo3(text_signature = "(self, individual_digits=False)")]
++ pub struct PyDigits {}
++ #[pymethods]
++ impl PyDigits {
++@@ -522,8 +522,8 @@ impl PyDigits {
++ /// It roughly follows https://github.com/google/sentencepiece/blob/master/data/Scripts.txt
++ /// Actually Hiragana and Katakana are fused with Han, and 0x30FC is Han too.
++ /// This mimicks SentencePiece Unigram implementation.
++-#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name=UnicodeScripts)]
++-#[text_signature = "(self)"]
+++#[pyclass(extends=PyPreTokenizer, module = "tokenizers.pre_tokenizers", name = "UnicodeScripts")]
+++#[pyo3(text_signature = "(self)")]
++ pub struct PyUnicodeScripts {}
++ #[pymethods]
++ impl PyUnicodeScripts {
++@@ -687,7 +687,7 @@ mod test {
++         let gil = Python::acquire_gil();
++         assert_eq!(
++             "tokenizers.pre_tokenizers.Whitespace",
++-            py_wsp.as_ref(gil.python()).get_type().name()
+++            py_wsp.as_ref(gil.python()).get_type().name().unwrap()
++         );
++     }
++ 
++diff --git a/bindings/python/src/processors.rs b/bindings/python/src/processors.rs
++index 12990b3..4fd7c50 100644
++--- a/bindings/python/src/processors.rs
+++++ b/bindings/python/src/processors.rs
++@@ -20,7 +20,7 @@ use tokenizers as tk;
++ ///
++ /// This class is not supposed to be instantiated directly. Instead, any implementation of
++ /// a PostProcessor will return an instance of this class when instantiated.
++-#[pyclass(dict, module = "tokenizers.processors", name=PostProcessor)]
+++#[pyclass(dict, module = "tokenizers.processors", name = "PostProcessor")]
++ #[derive(Clone, Deserialize, Serialize)]
++ pub struct PyPostProcessor {
++     #[serde(flatten)]
++@@ -100,7 +100,7 @@ impl PyPostProcessor {
++     ///
++     /// Returns:
++     ///     :obj:`int`: The number of tokens to add
++-    #[text_signature = "(self, is_pair)"]
+++    #[pyo3(text_signature = "(self, is_pair)")]
++     fn num_special_tokens_to_add(&self, is_pair: bool) -> usize {
++         self.processor.added_tokens(is_pair)
++     }
++@@ -120,7 +120,7 @@ impl PyPostProcessor {
++     /// Return:
++     ///     :class:`~tokenizers.Encoding`: The final encoding
++     #[args(pair = "None", add_special_tokens = "true")]
++-    #[text_signature = "(self, encoding, pair=None, add_special_tokens=True)"]
+++    #[pyo3(text_signature = "(self, encoding, pair=None, add_special_tokens=True)")]
++     fn process(
++         &self,
++         encoding: &PyEncoding,
++@@ -149,8 +149,8 @@ impl PyPostProcessor {
++ ///
++ ///     cls (:obj:`Tuple[str, int]`):
++ ///         A tuple with the string representation of the CLS token, and its id
++-#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name=BertProcessing)]
++-#[text_signature = "(self, sep, cls)"]
+++#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name = "BertProcessing")]
+++#[pyo3(text_signature = "(self, sep, cls)")]
++ pub struct PyBertProcessing {}
++ #[pymethods]
++ impl PyBertProcessing {
++@@ -191,8 +191,8 @@ impl PyBertProcessing {
++ ///     add_prefix_space (:obj:`bool`, `optional`, defaults to :obj:`True`):
++ ///         Whether the add_prefix_space option was enabled during pre-tokenization. This
++ ///         is relevant because it defines the way the offsets are trimmed out.
++-#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name=RobertaProcessing)]
++-#[text_signature = "(self, sep, cls, trim_offsets=True, add_prefix_space=True)"]
+++#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name = "RobertaProcessing")]
+++#[pyo3(text_signature = "(self, sep, cls, trim_offsets=True, add_prefix_space=True)")]
++ pub struct PyRobertaProcessing {}
++ #[pymethods]
++ impl PyRobertaProcessing {
++@@ -226,8 +226,8 @@ impl PyRobertaProcessing {
++ /// Args:
++ ///     trim_offsets (:obj:`bool`):
++ ///         Whether to trim the whitespaces from the produced offsets.
++-#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name=ByteLevel)]
++-#[text_signature = "(self, trim_offsets=True)"]
+++#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name = "ByteLevel")]
+++#[pyo3(text_signature = "(self, trim_offsets=True)")]
++ pub struct PyByteLevel {}
++ #[pymethods]
++ impl PyByteLevel {
++@@ -378,8 +378,8 @@ impl FromPyObject<'_> for PyTemplate {
++ ///
++ ///          The given dict expects the provided :obj:`ids` and :obj:`tokens` lists to have
++ ///          the same length.
++-#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name=TemplateProcessing)]
++-#[text_signature = "(self, single, pair, special_tokens)"]
+++#[pyclass(extends=PyPostProcessor, module = "tokenizers.processors", name = "TemplateProcessing")]
+++#[pyo3(text_signature = "(self, single, pair, special_tokens)")]
++ pub struct PyTemplateProcessing {}
++ #[pymethods]
++ impl PyTemplateProcessing {
++@@ -429,7 +429,7 @@ mod test {
++         let gil = Python::acquire_gil();
++         assert_eq!(
++             "tokenizers.processors.BertProcessing",
++-            py_bert.as_ref(gil.python()).get_type().name()
+++            py_bert.as_ref(gil.python()).get_type().name().unwrap()
++         );
++     }
++ 
++diff --git a/bindings/python/src/token.rs b/bindings/python/src/token.rs
++index eb2a472..f1db997 100644
++--- a/bindings/python/src/token.rs
+++++ b/bindings/python/src/token.rs
++@@ -1,7 +1,7 @@
++ use pyo3::prelude::*;
++ use tk::Token;
++ 
++-#[pyclass(module = "tokenizers", name=Token)]
+++#[pyclass(module = "tokenizers", name = "Token")]
++ #[derive(Clone)]
++ pub struct PyToken {
++     token: Token,
++diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs
++index 89073a4..cfa7358 100644
++--- a/bindings/python/src/tokenizer.rs
+++++ b/bindings/python/src/tokenizer.rs
++@@ -55,8 +55,8 @@ use crate::utils::{MaybeSizedIterator, PyBufferedIterator};
++ ///         lowercasing the text, the token could be extract from the input ``"I saw a lion
++ ///         Yesterday"``.
++ ///
++-#[pyclass(dict, module = "tokenizers", name=AddedToken)]
++-#[text_signature = "(self, content, single_word=False, lstrip=False, rstrip=False, normalized=True)"]
+++#[pyclass(dict, module = "tokenizers", name = "AddedToken")]
+++#[pyo3(text_signature = "(self, content, single_word=False, lstrip=False, rstrip=False, normalized=True)")]
++ pub struct PyAddedToken {
++     pub content: String,
++     pub is_special_token: bool,
++@@ -285,6 +285,7 @@ impl FromPyObject<'_> for PyArrayUnicode {
++             let seq = (0..n_elem)
++                 .map(|i| {
++                     let bytes = &all_bytes[i * elsize..(i + 1) * elsize];
+++                    #[allow(deprecated)]
++                     let unicode = pyo3::ffi::PyUnicode_FromUnicode(
++                         bytes.as_ptr() as *const _,
++                         elsize as isize / alignment as isize,
++@@ -438,8 +439,8 @@ type Tokenizer = TokenizerImpl<PyModel, PyNormalizer, PyPreTokenizer, PyPostProc
++ ///     model (:class:`~tokenizers.models.Model`):
++ ///         The core algorithm that this :obj:`Tokenizer` should be using.
++ ///
++-#[pyclass(dict, module = "tokenizers", name=Tokenizer)]
++-#[text_signature = "(self, model)"]
+++#[pyclass(dict, module = "tokenizers", name = "Tokenizer")]
+++#[pyo3(text_signature = "(self, model)")]
++ #[derive(Clone)]
++ pub struct PyTokenizer {
++     tokenizer: Tokenizer,
++@@ -502,7 +503,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :class:`~tokenizers.Tokenizer`: The new tokenizer
++     #[staticmethod]
++-    #[text_signature = "(json)"]
+++    #[pyo3(text_signature = "(json)")]
++     fn from_str(json: &str) -> PyResult<Self> {
++         let tokenizer: PyResult<_> = ToPyResult(json.parse()).into();
++         Ok(Self::new(tokenizer?))
++@@ -518,7 +519,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :class:`~tokenizers.Tokenizer`: The new tokenizer
++     #[staticmethod]
++-    #[text_signature = "(path)"]
+++    #[pyo3(text_signature = "(path)")]
++     fn from_file(path: &str) -> PyResult<Self> {
++         let tokenizer: PyResult<_> = ToPyResult(Tokenizer::from_file(path)).into();
++         Ok(Self::new(tokenizer?))
++@@ -533,7 +534,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :class:`~tokenizers.Tokenizer`: The new tokenizer
++     #[staticmethod]
++-    #[text_signature = "(buffer)"]
+++    #[pyo3(text_signature = "(buffer)")]
++     fn from_buffer(buffer: &PyBytes) -> PyResult<Self> {
++         let tokenizer = serde_json::from_slice(buffer.as_bytes()).map_err(|e| {
++             exceptions::PyValueError::new_err(format!(
++@@ -561,7 +562,7 @@ impl PyTokenizer {
++     ///     :class:`~tokenizers.Tokenizer`: The new tokenizer
++     #[staticmethod]
++     #[args(revision = "String::from(\"main\")", auth_token = "None")]
++-    #[text_signature = "(identifier, revision=\"main\", auth_token=None)"]
+++    #[pyo3(text_signature = "(identifier, revision=\"main\", auth_token=None)")]
++     fn from_pretrained(
++         identifier: &str,
++         revision: String,
++@@ -590,7 +591,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :obj:`str`: A string representing the serialized Tokenizer
++     #[args(pretty = false)]
++-    #[text_signature = "(self, pretty=False)"]
+++    #[pyo3(text_signature = "(self, pretty=False)")]
++     fn to_str(&self, pretty: bool) -> PyResult<String> {
++         ToPyResult(self.tokenizer.to_string(pretty)).into()
++     }
++@@ -604,7 +605,7 @@ impl PyTokenizer {
++     ///     pretty (:obj:`bool`, defaults to :obj:`True`):
++     ///         Whether the JSON file should be pretty formatted.
++     #[args(pretty = true)]
++-    #[text_signature = "(self, path, pretty=True)"]
+++    #[pyo3(text_signature = "(self, path, pretty=True)")]
++     fn save(&self, path: &str, pretty: bool) -> PyResult<()> {
++         ToPyResult(self.tokenizer.save(path, pretty)).into()
++     }
++@@ -612,7 +613,7 @@ impl PyTokenizer {
++     /// Return the number of special tokens that would be added for single/pair sentences.
++     /// :param is_pair: Boolean indicating if the input would be a single sentence or a pair
++     /// :return:
++-    #[text_signature = "(self, is_pair)"]
+++    #[pyo3(text_signature = "(self, is_pair)")]
++     fn num_special_tokens_to_add(&self, is_pair: bool) -> usize {
++         self.tokenizer
++             .get_post_processor()
++@@ -628,7 +629,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :obj:`Dict[str, int]`: The vocabulary
++     #[args(with_added_tokens = true)]
++-    #[text_signature = "(self, with_added_tokens=True)"]
+++    #[pyo3(text_signature = "(self, with_added_tokens=True)")]
++     fn get_vocab(&self, with_added_tokens: bool) -> HashMap<String, u32> {
++         self.tokenizer.get_vocab(with_added_tokens)
++     }
++@@ -642,7 +643,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :obj:`int`: The size of the vocabulary
++     #[args(with_added_tokens = true)]
++-    #[text_signature = "(self, with_added_tokens=True)"]
+++    #[pyo3(text_signature = "(self, with_added_tokens=True)")]
++     fn get_vocab_size(&self, with_added_tokens: bool) -> usize {
++         self.tokenizer.get_vocab_size(with_added_tokens)
++     }
++@@ -664,7 +665,7 @@ impl PyTokenizer {
++     ///     direction (:obj:`str`, defaults to :obj:`right`):
++     ///         Truncate direction
++     #[args(kwargs = "**")]
++-    #[text_signature = "(self, max_length, stride=0, strategy='longest_first', direction='right')"]
+++    #[pyo3(text_signature = "(self, max_length, stride=0, strategy='longest_first', direction='right')")]
++     fn enable_truncation(&mut self, max_length: usize, kwargs: Option<&PyDict>) -> PyResult<()> {
++         let mut params = TruncationParams {
++             max_length,
++@@ -714,7 +715,7 @@ impl PyTokenizer {
++     }
++ 
++     /// Disable truncation
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn no_truncation(&mut self) {
++         self.tokenizer.with_truncation(None);
++     }
++@@ -764,7 +765,7 @@ impl PyTokenizer {
++     ///         If specified, the length at which to pad. If not specified we pad using the size of
++     ///         the longest sequence in a batch.
++     #[args(kwargs = "**")]
++-    #[text_signature = "(self, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]', length=None, pad_to_multiple_of=None)"]
+++    #[pyo3(text_signature = "(self, direction='right', pad_id=0, pad_type_id=0, pad_token='[PAD]', length=None, pad_to_multiple_of=None)")]
++     fn enable_padding(&mut self, kwargs: Option<&PyDict>) -> PyResult<()> {
++         let mut params = PaddingParams::default();
++ 
++@@ -822,7 +823,7 @@ impl PyTokenizer {
++     }
++ 
++     /// Disable padding
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn no_padding(&mut self) {
++         self.tokenizer.with_padding(None);
++     }
++@@ -891,7 +892,7 @@ impl PyTokenizer {
++     ///     :class:`~tokenizers.Encoding`: The encoded result
++     ///
++     #[args(pair = "None", is_pretokenized = "false", add_special_tokens = "true")]
++-    #[text_signature = "(self, sequence, pair=None, is_pretokenized=False, add_special_tokens=True)"]
+++    #[pyo3(text_signature = "(self, sequence, pair=None, is_pretokenized=False, add_special_tokens=True)")]
++     fn encode(
++         &self,
++         sequence: &PyAny,
++@@ -956,7 +957,7 @@ impl PyTokenizer {
++     ///     A :obj:`List` of :class:`~tokenizers.Encoding`: The encoded batch
++     ///
++     #[args(is_pretokenized = "false", add_special_tokens = "true")]
++-    #[text_signature = "(self, input, is_pretokenized=False, add_special_tokens=True)"]
+++    #[pyo3(text_signature = "(self, input, is_pretokenized=False, add_special_tokens=True)")]
++     fn encode_batch(
++         &self,
++         input: Vec<&PyAny>,
++@@ -999,7 +1000,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :obj:`str`: The decoded string
++     #[args(skip_special_tokens = true)]
++-    #[text_signature = "(self, ids, skip_special_tokens=True)"]
+++    #[pyo3(text_signature = "(self, ids, skip_special_tokens=True)")]
++     fn decode(&self, ids: Vec<u32>, skip_special_tokens: bool) -> PyResult<String> {
++         ToPyResult(self.tokenizer.decode(ids, skip_special_tokens)).into()
++     }
++@@ -1016,7 +1017,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :obj:`List[str]`: A list of decoded strings
++     #[args(skip_special_tokens = true)]
++-    #[text_signature = "(self, sequences, skip_special_tokens=True)"]
+++    #[pyo3(text_signature = "(self, sequences, skip_special_tokens=True)")]
++     fn decode_batch(
++         &self,
++         sequences: Vec<Vec<u32>>,
++@@ -1036,7 +1037,7 @@ impl PyTokenizer {
++     ///
++     /// Returns:
++     ///     :obj:`Optional[int]`: An optional id, :obj:`None` if out of vocabulary
++-    #[text_signature = "(self, token)"]
+++    #[pyo3(text_signature = "(self, token)")]
++     fn token_to_id(&self, token: &str) -> Option<u32> {
++         self.tokenizer.token_to_id(token)
++     }
++@@ -1049,7 +1050,7 @@ impl PyTokenizer {
++     ///
++     /// Returns:
++     ///     :obj:`Optional[str]`: An optional token, :obj:`None` if out of vocabulary
++-    #[text_signature = "(self, id)"]
+++    #[pyo3(text_signature = "(self, id)")]
++     fn id_to_token(&self, id: u32) -> Option<String> {
++         self.tokenizer.id_to_token(id)
++     }
++@@ -1066,7 +1067,7 @@ impl PyTokenizer {
++     ///
++     /// Returns:
++     ///     :obj:`int`: The number of tokens that were created in the vocabulary
++-    #[text_signature = "(self, tokens)"]
+++    #[pyo3(text_signature = "(self, tokens)")]
++     fn add_tokens(&mut self, tokens: &PyList) -> PyResult<usize> {
++         let tokens = tokens
++             .into_iter()
++@@ -1103,7 +1104,7 @@ impl PyTokenizer {
++     ///
++     /// Returns:
++     ///     :obj:`int`: The number of tokens that were created in the vocabulary
++-    #[text_signature = "(self, tokens)"]
+++    #[pyo3(text_signature = "(self, tokens)")]
++     fn add_special_tokens(&mut self, tokens: &PyList) -> PyResult<usize> {
++         let tokens = tokens
++             .into_iter()
++@@ -1137,7 +1138,7 @@ impl PyTokenizer {
++     ///     trainer (:obj:`~tokenizers.trainers.Trainer`, `optional`):
++     ///         An optional trainer that should be used to train our Model
++     #[args(trainer = "None")]
++-    #[text_signature = "(self, files, trainer = None)"]
+++    #[pyo3(text_signature = "(self, files, trainer = None)")]
++     fn train(&mut self, files: Vec<String>, trainer: Option<&mut PyTrainer>) -> PyResult<()> {
++         let mut trainer =
++             trainer.map_or_else(|| self.tokenizer.get_model().get_trainer(), |t| t.clone());
++@@ -1173,7 +1174,7 @@ impl PyTokenizer {
++     ///         The total number of sequences in the iterator. This is used to
++     ///         provide meaningful progress tracking
++     #[args(trainer = "None", length = "None")]
++-    #[text_signature = "(self, iterator, trainer=None, length=None)"]
+++    #[pyo3(text_signature = "(self, iterator, trainer=None, length=None)")]
++     fn train_from_iterator(
++         &mut self,
++         py: Python,
++@@ -1239,7 +1240,7 @@ impl PyTokenizer {
++     /// Returns:
++     ///     :class:`~tokenizers.Encoding`: The final post-processed encoding
++     #[args(pair = "None", add_special_tokens = true)]
++-    #[text_signature = "(self, encoding, pair=None, add_special_tokens=True)"]
+++    #[pyo3(text_signature = "(self, encoding, pair=None, add_special_tokens=True)")]
++     fn post_process(
++         &self,
++         encoding: &PyEncoding,
++diff --git a/bindings/python/src/trainers.rs b/bindings/python/src/trainers.rs
++index 7def6fc..98082dd 100644
++--- a/bindings/python/src/trainers.rs
+++++ b/bindings/python/src/trainers.rs
++@@ -15,9 +15,9 @@ use crate::utils::PyChar;
++ ///
++ /// This class is not supposed to be instantiated directly. Instead, any implementation of a
++ /// Trainer will return an instance of this class when instantiated.
++-#[pyclass(name=Trainer, module = "tokenizers.trainers", name=Trainer)]
+++#[pyclass(module = "tokenizers.trainers", name = "Trainer")]
++ #[derive(Clone)]
++-#[text_signature = "(self, vocab_size=30000, min_frequency=0,show_progress=True, special_tokens=[],limit_alphabet=None, initial_alphabet = [], continuing_subword_prefix=None, end_of_word_suffix=None)"]
+++#[pyo3(text_signature = "(self, vocab_size=30000, min_frequency=0,show_progress=True, special_tokens=[],limit_alphabet=None, initial_alphabet = [], continuing_subword_prefix=None, end_of_word_suffix=None)")]
++ pub struct PyTrainer {
++     pub trainer: Arc<RwLock<TrainerWrapper>>,
++ }
++@@ -132,7 +132,7 @@ macro_rules! setter {
++ ///
++ ///     end_of_word_suffix (:obj:`str`, `optional`):
++ ///         A suffix to be used for every subword that is a end-of-word.
++-#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name=BpeTrainer)]
+++#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name = "BpeTrainer")]
++ pub struct PyBpeTrainer {}
++ #[pymethods]
++ impl PyBpeTrainer {
++@@ -335,8 +335,8 @@ impl PyBpeTrainer {
++ ///
++ ///     end_of_word_suffix (:obj:`str`, `optional`):
++ ///         A suffix to be used for every subword that is a end-of-word.
++-#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name=WordPieceTrainer)]
++-#[text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)"]
+++#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name = "WordPieceTrainer")]
+++#[pyo3(text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)")]
++ pub struct PyWordPieceTrainer {}
++ #[pymethods]
++ impl PyWordPieceTrainer {
++@@ -525,7 +525,7 @@ impl PyWordPieceTrainer {
++ ///
++ ///     special_tokens (:obj:`List[Union[str, AddedToken]]`):
++ ///         A list of special tokens the model should know of.
++-#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name=WordLevelTrainer)]
+++#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name = "WordLevelTrainer")]
++ pub struct PyWordLevelTrainer {}
++ #[pymethods]
++ impl PyWordLevelTrainer {
++@@ -681,8 +681,8 @@ impl PyWordLevelTrainer {
++ ///     n_sub_iterations (:obj:`int`):
++ ///         The number of iterations of the EM algorithm to perform before
++ ///         pruning the vocabulary.
++-#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name=UnigramTrainer)]
++-#[text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)"]
+++#[pyclass(extends=PyTrainer, module = "tokenizers.trainers", name = "UnigramTrainer")]
+++#[pyo3(text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)")]
++ pub struct PyUnigramTrainer {}
++ #[pymethods]
++ impl PyUnigramTrainer {
++diff --git a/bindings/python/src/utils/iterators.rs b/bindings/python/src/utils/iterators.rs
++index 0715df5..cf6310b 100644
++--- a/bindings/python/src/utils/iterators.rs
+++++ b/bindings/python/src/utils/iterators.rs
++@@ -1,5 +1,5 @@
++ use pyo3::prelude::*;
++-use pyo3::{AsPyPointer, PyNativeType};
+++use pyo3::AsPyPointer;
++ use std::collections::VecDeque;
++ 
++ /// An simple iterator that can be instantiated with a specified length.
++diff --git a/bindings/python/src/utils/normalization.rs b/bindings/python/src/utils/normalization.rs
++index 39b1b73..d60d91c 100644
++--- a/bindings/python/src/utils/normalization.rs
+++++ b/bindings/python/src/utils/normalization.rs
++@@ -192,7 +192,7 @@ fn slice(
++ /// Args:
++ ///     sequence: str:
++ ///         The string sequence used to initialize this NormalizedString
++-#[pyclass(module = "tokenizers", name=NormalizedString)]
+++#[pyclass(module = "tokenizers", name = "NormalizedString")]
++ #[derive(Clone)]
++ pub struct PyNormalizedString {
++     pub(crate) normalized: NormalizedString,
++@@ -217,91 +217,91 @@ impl PyNormalizedString {
++     }
++ 
++     /// Runs the NFD normalization
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn nfd(&mut self) {
++         self.normalized.nfd();
++     }
++ 
++     /// Runs the NFKD normalization
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn nfkd(&mut self) {
++         self.normalized.nfkd();
++     }
++ 
++     /// Runs the NFC normalization
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn nfc(&mut self) {
++         self.normalized.nfc();
++     }
++ 
++     /// Runs the NFKC normalization
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn nfkc(&mut self) {
++         self.normalized.nfkc();
++     }
++ 
++     /// Lowercase the string
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn lowercase(&mut self) {
++         self.normalized.lowercase();
++     }
++ 
++     /// Uppercase the string
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn uppercase(&mut self) {
++         self.normalized.uppercase();
++     }
++ 
++     /// Prepend the given sequence to the string
++-    #[text_signature = "(self, s)"]
+++    #[pyo3(text_signature = "(self, s)")]
++     fn prepend(&mut self, s: &str) {
++         self.normalized.prepend(s);
++     }
++ 
++     /// Append the given sequence to the string
++-    #[text_signature = "(self, s)"]
+++    #[pyo3(text_signature = "(self, s)")]
++     fn append(&mut self, s: &str) {
++         self.normalized.append(s);
++     }
++ 
++     /// Strip the left of the string
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn lstrip(&mut self) {
++         self.normalized.lstrip();
++     }
++ 
++     /// Strip the right of the string
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn rstrip(&mut self) {
++         self.normalized.rstrip();
++     }
++ 
++     /// Strip both ends of the string
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn strip(&mut self) {
++         self.normalized.strip();
++     }
++ 
++     /// Clears the string
++-    #[text_signature = "(self)"]
+++    #[pyo3(text_signature = "(self)")]
++     fn clear(&mut self) {
++         self.normalized.clear();
++     }
++ 
++     /// Slice the string using the given range
++-    #[text_signature = "(self, range)"]
+++    #[pyo3(text_signature = "(self, range)")]
++     fn slice(&self, range: PyRange) -> PyResult<Option<PyNormalizedString>> {
++         slice(&self.normalized, &range)
++     }
++ 
++     /// Filter each character of the string using the given func
++-    #[text_signature = "(self, func)"]
+++    #[pyo3(text_signature = "(self, func)")]
++     fn filter(&mut self, func: &PyAny) -> PyResult<()> {
++         filter(&mut self.normalized, func)
++     }
++ 
++     /// Calls the given function for each character of the string
++-    #[text_signature = "(self, func)"]
+++    #[pyo3(text_signature = "(self, func)")]
++     fn for_each(&self, func: &PyAny) -> PyResult<()> {
++         for_each(&self.normalized, func)
++     }
++@@ -310,7 +310,7 @@ impl PyNormalizedString {
++     ///
++     /// Replaces each character of the string using the returned value. Each
++     /// returned value **must** be a str of length 1 (ie a character).
++-    #[text_signature = "(self, func)"]
+++    #[pyo3(text_signature = "(self, func)")]
++     fn map(&mut self, func: &PyAny) -> PyResult<()> {
++         map(&mut self.normalized, func)
++     }
++@@ -328,7 +328,7 @@ impl PyNormalizedString {
++     ///
++     /// Returns:
++     ///     A list of NormalizedString, representing each split
++-    #[text_signature = "(self, pattern, behavior)"]
+++    #[pyo3(text_signature = "(self, pattern, behavior)")]
++     fn split(
++         &mut self,
++         pattern: PyPattern,
++@@ -349,7 +349,7 @@ impl PyNormalizedString {
++     ///
++     ///     content: str:
++     ///         The content to be used as replacement
++-    #[text_signature = "(self, pattern, content)"]
+++    #[pyo3(text_signature = "(self, pattern, content)")]
++     fn replace(&mut self, pattern: PyPattern, content: &str) -> PyResult<()> {
++         ToPyResult(self.normalized.replace(pattern, content)).into()
++     }
++@@ -389,7 +389,7 @@ impl From<PyNormalizedString> for NormalizedString {
++     }
++ }
++ 
++-#[pyclass(module = "tokenizers", name=NormalizedStringRefMut)]
+++#[pyclass(module = "tokenizers", name = "NormalizedStringRefMut")]
++ #[derive(Clone)]
++ pub struct PyNormalizedStringRefMut {
++     inner: RefMutContainer<NormalizedString>,
++diff --git a/bindings/python/src/utils/pretokenization.rs b/bindings/python/src/utils/pretokenization.rs
++index b4d5a66..fb692c7 100644
++--- a/bindings/python/src/utils/pretokenization.rs
+++++ b/bindings/python/src/utils/pretokenization.rs
++@@ -147,8 +147,8 @@ fn to_encoding(
++ /// Args:
++ ///     sequence: str:
++ ///         The string sequence used to initialize this PreTokenizedString
++-#[pyclass(module = "tokenizers", name=PreTokenizedString)]
++-#[text_signature = "(self, sequence)"]
+++#[pyclass(module = "tokenizers", name = "PreTokenizedString")]
+++#[pyo3(text_signature = "(self, sequence)")]
++ pub struct PyPreTokenizedString {
++     pub(crate) pretok: tk::PreTokenizedString,
++ }
++@@ -182,7 +182,7 @@ impl PyPreTokenizedString {
++     ///         just return it directly.
++     ///         In order for the offsets to be tracked accurately, any returned `NormalizedString`
++     ///         should come from calling either `.split` or `.slice` on the received one.
++-    #[text_signature = "(self, func)"]
+++    #[pyo3(text_signature = "(self, func)")]
++     fn split(&mut self, func: &PyAny) -> PyResult<()> {
++         split(&mut self.pretok, func)
++     }
++@@ -194,7 +194,7 @@ impl PyPreTokenizedString {
++     ///         The function used to normalize each underlying split. This function
++     ///         does not need to return anything, just calling the methods on the provided
++     ///         NormalizedString allow its modification.
++-    #[text_signature = "(self, func)"]
+++    #[pyo3(text_signature = "(self, func)")]
++     fn normalize(&mut self, func: &PyAny) -> PyResult<()> {
++         normalize(&mut self.pretok, func)
++     }
++@@ -205,7 +205,7 @@ impl PyPreTokenizedString {
++     ///     func: Callable[[str], List[Token]]:
++     ///         The function used to tokenize each underlying split. This function must return
++     ///         a list of Token generated from the input str.
++-    #[text_signature = "(self, func)"]
+++    #[pyo3(text_signature = "(self, func)")]
++     fn tokenize(&mut self, func: &PyAny) -> PyResult<()> {
++         tokenize(&mut self.pretok, func)
++     }
++@@ -224,7 +224,7 @@ impl PyPreTokenizedString {
++     /// Returns:
++     ///     An Encoding
++     #[args(type_id = "0", word_idx = "None")]
++-    #[text_signature = "(self, type_id=0, word_idx=None)"]
+++    #[pyo3(text_signature = "(self, type_id=0, word_idx=None)")]
++     fn to_encoding(&self, type_id: u32, word_idx: Option<u32>) -> PyResult<PyEncoding> {
++         to_encoding(&self.pretok, type_id, word_idx)
++     }
++@@ -249,7 +249,7 @@ impl PyPreTokenizedString {
++         offset_referential = "PyOffsetReferential(OffsetReferential::Original)",
++         offset_type = "PyOffsetType(OffsetType::Char)"
++     )]
++-    #[text_signature = "(self, offset_referential=\"original\", offset_type=\"char\")"]
+++    #[pyo3(text_signature = "(self, offset_referential=\"original\", offset_type=\"char\")")]
++     fn get_splits(
++         &self,
++         offset_referential: PyOffsetReferential,
++@@ -259,7 +259,7 @@ impl PyPreTokenizedString {
++     }
++ }
++ 
++-#[pyclass(module = "tokenizers", name=PreTokenizedString)]
+++#[pyclass(module = "tokenizers", name = "PreTokenizedString")]
++ #[derive(Clone)]
++ pub struct PyPreTokenizedStringRefMut {
++     inner: RefMutContainer<PreTokenizedString>,
++diff --git a/bindings/python/src/utils/regex.rs b/bindings/python/src/utils/regex.rs
++index 8170ffc..9e0d424 100644
++--- a/bindings/python/src/utils/regex.rs
+++++ b/bindings/python/src/utils/regex.rs
++@@ -3,8 +3,8 @@ use pyo3::exceptions;
++ use pyo3::prelude::*;
++ 
++ /// Instantiate a new Regex with the given pattern
++-#[pyclass(module = "tokenizers", name=Regex)]
++-#[text_signature = "(self, pattern)"]
+++#[pyclass(module = "tokenizers", name = "Regex")]
+++#[pyo3(text_signature = "(self, pattern)")]
++ pub struct PyRegex {
++     pub inner: Regex,
++     pub pattern: String,
++-- 
++2.35.1.windows.2
 +
-+[target.aarch64-apple-darwin]
-+rustflags = [
-+  "-C", "link-arg=-undefined",
-+  "-C", "link-arg=dynamic_lookup",
-+]
 diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock
 index 823f4a2..286cd68 100644
 --- a/bindings/python/Cargo.lock