diff --git a/poetry.lock b/poetry.lock
index c9a078f955..68f21010e0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -60,22 +60,22 @@ files = [
 
 [[package]]
 name = "attrs"
-version = "22.2.0"
+version = "23.1.0"
 description = "Classes Without Boilerplate"
 category = "main"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"},
-    {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"},
+    {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
+    {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
 ]
 
 [package.extras]
-cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"]
-dev = ["attrs[docs,tests]"]
-docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"]
-tests = ["attrs[tests-no-zope]", "zope.interface"]
-tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"]
+cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
+dev = ["attrs[docs,tests]", "pre-commit"]
+docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
+tests = ["attrs[tests-no-zope]", "zope-interface"]
+tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 
 [[package]]
 name = "black"
@@ -122,6 +122,18 @@ d = ["aiohttp (>=3.7.4)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
+[[package]]
+name = "blinker"
+version = "1.6.2"
+description = "Fast, simple object-to-object and broadcast signaling"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "blinker-1.6.2-py3-none-any.whl", hash = "sha256:c3d739772abb7bc2860abf5f2ec284223d9ad5c76da018234f6f50d6f31ab1f0"},
+    {file = "blinker-1.6.2.tar.gz", hash = "sha256:4afd3de66ef3a9f8067559fb7a1cbe555c17dcbe15971b05d1b625c3e7abe213"},
+]
+
 [[package]]
 name = "blis"
 version = "0.7.9"
@@ -487,31 +499,31 @@ toml = ["tomli"]
 
 [[package]]
 name = "cryptography"
-version = "40.0.1"
+version = "40.0.2"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "cryptography-40.0.1-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:918cb89086c7d98b1b86b9fdb70c712e5a9325ba6f7d7cfb509e784e0cfc6917"},
-    {file = "cryptography-40.0.1-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9618a87212cb5200500e304e43691111570e1f10ec3f35569fdfcd17e28fd797"},
-    {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4805a4ca729d65570a1b7cac84eac1e431085d40387b7d3bbaa47e39890b88"},
-    {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63dac2d25c47f12a7b8aa60e528bfb3c51c5a6c5a9f7c86987909c6c79765554"},
-    {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0a4e3406cfed6b1f6d6e87ed243363652b2586b2d917b0609ca4f97072994405"},
-    {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1e0af458515d5e4028aad75f3bb3fe7a31e46ad920648cd59b64d3da842e4356"},
-    {file = "cryptography-40.0.1-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d8aa3609d337ad85e4eb9bb0f8bcf6e4409bfb86e706efa9a027912169e89122"},
-    {file = "cryptography-40.0.1-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cf91e428c51ef692b82ce786583e214f58392399cf65c341bc7301d096fa3ba2"},
-    {file = "cryptography-40.0.1-cp36-abi3-win32.whl", hash = "sha256:650883cc064297ef3676b1db1b7b1df6081794c4ada96fa457253c4cc40f97db"},
-    {file = "cryptography-40.0.1-cp36-abi3-win_amd64.whl", hash = "sha256:a805a7bce4a77d51696410005b3e85ae2839bad9aa38894afc0aa99d8e0c3160"},
-    {file = "cryptography-40.0.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cd033d74067d8928ef00a6b1327c8ea0452523967ca4463666eeba65ca350d4c"},
-    {file = "cryptography-40.0.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d36bbeb99704aabefdca5aee4eba04455d7a27ceabd16f3b3ba9bdcc31da86c4"},
-    {file = "cryptography-40.0.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:32057d3d0ab7d4453778367ca43e99ddb711770477c4f072a51b3ca69602780a"},
-    {file = "cryptography-40.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f5d7b79fa56bc29580faafc2ff736ce05ba31feaa9d4735048b0de7d9ceb2b94"},
-    {file = "cryptography-40.0.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7c872413353c70e0263a9368c4993710070e70ab3e5318d85510cc91cce77e7c"},
-    {file = "cryptography-40.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:28d63d75bf7ae4045b10de5413fb1d6338616e79015999ad9cf6fc538f772d41"},
-    {file = "cryptography-40.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6f2bbd72f717ce33100e6467572abaedc61f1acb87b8d546001328d7f466b778"},
-    {file = "cryptography-40.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cc3a621076d824d75ab1e1e530e66e7e8564e357dd723f2533225d40fe35c60c"},
-    {file = "cryptography-40.0.1.tar.gz", hash = "sha256:2803f2f8b1e95f614419926c7e6f55d828afc614ca5ed61543877ae668cc3472"},
+    {file = "cryptography-40.0.2-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:8f79b5ff5ad9d3218afb1e7e20ea74da5f76943ee5edb7f76e56ec5161ec782b"},
+    {file = "cryptography-40.0.2-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:05dc219433b14046c476f6f09d7636b92a1c3e5808b9a6536adf4932b3b2c440"},
+    {file = "cryptography-40.0.2-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4df2af28d7bedc84fe45bd49bc35d710aede676e2a4cb7fc6d103a2adc8afe4d"},
+    {file = "cryptography-40.0.2-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dcca15d3a19a66e63662dc8d30f8036b07be851a8680eda92d079868f106288"},
+    {file = "cryptography-40.0.2-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:a04386fb7bc85fab9cd51b6308633a3c271e3d0d3eae917eebab2fac6219b6d2"},
+    {file = "cryptography-40.0.2-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:adc0d980fd2760c9e5de537c28935cc32b9353baaf28e0814df417619c6c8c3b"},
+    {file = "cryptography-40.0.2-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d5a1bd0e9e2031465761dfa920c16b0065ad77321d8a8c1f5ee331021fda65e9"},
+    {file = "cryptography-40.0.2-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a95f4802d49faa6a674242e25bfeea6fc2acd915b5e5e29ac90a32b1139cae1c"},
+    {file = "cryptography-40.0.2-cp36-abi3-win32.whl", hash = "sha256:aecbb1592b0188e030cb01f82d12556cf72e218280f621deed7d806afd2113f9"},
+    {file = "cryptography-40.0.2-cp36-abi3-win_amd64.whl", hash = "sha256:b12794f01d4cacfbd3177b9042198f3af1c856eedd0a98f10f141385c809a14b"},
+    {file = "cryptography-40.0.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:142bae539ef28a1c76794cca7f49729e7c54423f615cfd9b0b1fa90ebe53244b"},
+    {file = "cryptography-40.0.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:956ba8701b4ffe91ba59665ed170a2ebbdc6fc0e40de5f6059195d9f2b33ca0e"},
+    {file = "cryptography-40.0.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4f01c9863da784558165f5d4d916093737a75203a5c5286fde60e503e4276c7a"},
+    {file = "cryptography-40.0.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3daf9b114213f8ba460b829a02896789751626a2a4e7a43a28ee77c04b5e4958"},
+    {file = "cryptography-40.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48f388d0d153350f378c7f7b41497a54ff1513c816bcbbcafe5b829e59b9ce5b"},
+    {file = "cryptography-40.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c0764e72b36a3dc065c155e5b22f93df465da9c39af65516fe04ed3c68c92636"},
+    {file = "cryptography-40.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:cbaba590180cba88cb99a5f76f90808a624f18b169b90a4abb40c1fd8c19420e"},
+    {file = "cryptography-40.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7a38250f433cd41df7fcb763caa3ee9362777fdb4dc642b9a349721d2bf47404"},
+    {file = "cryptography-40.0.2.tar.gz", hash = "sha256:c33c0d32b8594fa647d2e01dbccc303478e16fdd7cf98652d5b3ed11aa5e5c99"},
 ]
 
 [package.dependencies]
@@ -606,14 +618,14 @@ files = [
 
 [[package]]
 name = "domdf-python-tools"
-version = "3.6.0"
+version = "3.6.1"
 description = "Helpful functions for Python 🐍 🛠️"
 category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "domdf_python_tools-3.6.0-py3-none-any.whl", hash = "sha256:7a0a3b2c716854465b09b5c0c5f53d41f37562c5a0cd8746cd042ad7955430f1"},
-    {file = "domdf_python_tools-3.6.0.tar.gz", hash = "sha256:0ac5efa2ac648dca5653e386fe73aa995e66b215c9d16b7ee87e931322a1e6c8"},
+    {file = "domdf_python_tools-3.6.1-py3-none-any.whl", hash = "sha256:e18158460850957f18e740eb94ede56f580ddb0cb162ab9d9834ed8bbb1b6431"},
+    {file = "domdf_python_tools-3.6.1.tar.gz", hash = "sha256:acc04563d23bce4d437dd08af6b9bea788328c412772a044d8ca428a7ad861be"},
 ]
 
 [package.dependencies]
@@ -714,14 +726,14 @@ peewee = "*"
 
 [[package]]
 name = "faker"
-version = "18.4.0"
+version = "18.5.1"
 description = "Faker is a Python package that generates fake data for you."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "Faker-18.4.0-py3-none-any.whl", hash = "sha256:170ead9d0d140916168b142df69c44722b8f622ced2070802d0af9c476f0cb84"},
-    {file = "Faker-18.4.0.tar.gz", hash = "sha256:977ad0b7aa7a61ed57287d6a0723a827e9d3dd1f8cc82aaf08707f281b33bacc"},
+    {file = "Faker-18.5.1-py3-none-any.whl", hash = "sha256:137c6667583b0b458599b11305eed5a486e3932a14cb792b2b5b82ad1ad1a430"},
+    {file = "Faker-18.5.1.tar.gz", hash = "sha256:64e9ab619d75684cc0593aa9f336170b0b58fa77c07fc0ebc7b2b1258e53b67d"},
 ]
 
 [package.dependencies]
@@ -786,19 +798,19 @@ test = ["pytest"]
 
 [[package]]
 name = "filelock"
-version = "3.11.0"
+version = "3.12.0"
 description = "A platform independent file lock."
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "filelock-3.11.0-py3-none-any.whl", hash = "sha256:f08a52314748335c6460fc8fe40cd5638b85001225db78c2aa01c8c0db83b318"},
-    {file = "filelock-3.11.0.tar.gz", hash = "sha256:3618c0da67adcc0506b015fd11ef7faf1b493f0b40d87728e19986b536890c37"},
+    {file = "filelock-3.12.0-py3-none-any.whl", hash = "sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9"},
+    {file = "filelock-3.12.0.tar.gz", hash = "sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"},
 ]
 
 [package.extras]
-docs = ["furo (>=2023.3.27)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.2.2)", "diff-cover (>=7.5)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"]
+docs = ["furo (>=2023.3.27)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"]
 
 [[package]]
 name = "flake8"
@@ -852,35 +864,25 @@ files = [
 domdf-python-tools = ">=2.0.0"
 flake8 = ">=3.8.4"
 
-[[package]]
-name = "flashtext"
-version = "2.7"
-description = "Extract/Replaces keywords in sentences."
-category = "main"
-optional = false
-python-versions = "*"
-files = [
-    {file = "flashtext-2.7.tar.gz", hash = "sha256:a1be2b93e09d4f0deee4aad72b91a7127b61fb8b8034ca9a9c78ea745d8b05cf"},
-]
-
 [[package]]
 name = "flask"
-version = "2.2.3"
+version = "2.3.1"
 description = "A simple framework for building complex web applications."
 category = "dev"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "Flask-2.2.3-py3-none-any.whl", hash = "sha256:c0bec9477df1cb867e5a67c9e1ab758de9cb4a3e52dd70681f59fa40a62b3f2d"},
-    {file = "Flask-2.2.3.tar.gz", hash = "sha256:7eb373984bf1c770023fce9db164ed0c3353cd0b53f130f4693da0ca756a2e6d"},
+    {file = "Flask-2.3.1-py3-none-any.whl", hash = "sha256:8ba2a854608fdd603b67dccd4514a46450132227fb9df40127a8d0c1de8769ec"},
+    {file = "Flask-2.3.1.tar.gz", hash = "sha256:a6059db4297106e5a64b3215fa16ae641822c1cb97ecb498573549b2478602cb"},
 ]
 
 [package.dependencies]
-click = ">=8.0"
+blinker = ">=1.6.2"
+click = ">=8.1.3"
 importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.10\""}
-itsdangerous = ">=2.0"
-Jinja2 = ">=3.0"
-Werkzeug = ">=2.2.2"
+itsdangerous = ">=2.1.2"
+Jinja2 = ">=3.1.2"
+Werkzeug = ">=2.3.0"
 
 [package.extras]
 async = ["asgiref (>=3.2)"]
@@ -912,6 +914,42 @@ ufo = ["fs (>=2.2.0,<3)"]
 unicode = ["unicodedata2 (>=15.0.0)"]
 woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
 
+[[package]]
+name = "fsspec"
+version = "2023.4.0"
+description = "File-system specification"
+category = "main"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "fsspec-2023.4.0-py3-none-any.whl", hash = "sha256:f398de9b49b14e9d84d2c2d11b7b67121bc072fe97b930c4e5668ac3917d8307"},
+    {file = "fsspec-2023.4.0.tar.gz", hash = "sha256:bf064186cd8808f0b2f6517273339ba0a0c8fb1b7048991c28bc67f58b8b67cd"},
+]
+
+[package.extras]
+abfs = ["adlfs"]
+adl = ["adlfs"]
+arrow = ["pyarrow (>=1)"]
+dask = ["dask", "distributed"]
+devel = ["pytest", "pytest-cov"]
+dropbox = ["dropbox", "dropboxdrivefs", "requests"]
+full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
+fuse = ["fusepy"]
+gcs = ["gcsfs"]
+git = ["pygit2"]
+github = ["requests"]
+gs = ["gcsfs"]
+gui = ["panel"]
+hdfs = ["pyarrow (>=1)"]
+http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"]
+libarchive = ["libarchive-c"]
+oci = ["ocifs"]
+s3 = ["s3fs"]
+sftp = ["paramiko"]
+smb = ["smbprotocol"]
+ssh = ["paramiko"]
+tqdm = ["tqdm"]
+
 [[package]]
 name = "ghp-import"
 version = "2.1.0"
@@ -932,61 +970,61 @@ dev = ["flake8", "markdown", "twine", "wheel"]
 
 [[package]]
 name = "grpcio"
-version = "1.53.0"
+version = "1.54.0"
 description = "HTTP/2-based RPC framework"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "grpcio-1.53.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:752d2949b40e12e6ad3ed8cc552a65b54d226504f6b1fb67cab2ccee502cc06f"},
-    {file = "grpcio-1.53.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:8a48fd3a7222be226bb86b7b413ad248f17f3101a524018cdc4562eeae1eb2a3"},
-    {file = "grpcio-1.53.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:f3e837d29f0e1b9d6e7b29d569e2e9b0da61889e41879832ea15569c251c303a"},
-    {file = "grpcio-1.53.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aef7d30242409c3aa5839b501e877e453a2c8d3759ca8230dd5a21cda029f046"},
-    {file = "grpcio-1.53.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6f90698b5d1c5dd7b3236cd1fa959d7b80e17923f918d5be020b65f1c78b173"},
-    {file = "grpcio-1.53.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a96c3c7f564b263c5d7c0e49a337166c8611e89c4c919f66dba7b9a84abad137"},
-    {file = "grpcio-1.53.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ee81349411648d1abc94095c68cd25e3c2812e4e0367f9a9355be1e804a5135c"},
-    {file = "grpcio-1.53.0-cp310-cp310-win32.whl", hash = "sha256:fdc6191587de410a184550d4143e2b24a14df495c86ca15e59508710681690ac"},
-    {file = "grpcio-1.53.0-cp310-cp310-win_amd64.whl", hash = "sha256:658ffe1e39171be00490db5bd3b966f79634ac4215a1eb9a85c6cd6783bf7f6e"},
-    {file = "grpcio-1.53.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:1b172e6d497191940c4b8d75b53de82dc252e15b61de2951d577ec5b43316b29"},
-    {file = "grpcio-1.53.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:82434ba3a5935e47908bc861ce1ebc43c2edfc1001d235d6e31e5d3ed55815f7"},
-    {file = "grpcio-1.53.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:1c734a2d4843e4e14ececf5600c3c4750990ec319e1299db7e4f0d02c25c1467"},
-    {file = "grpcio-1.53.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6a2ead3de3b2d53119d473aa2f224030257ef33af1e4ddabd4afee1dea5f04c"},
-    {file = "grpcio-1.53.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a34d6e905f071f9b945cabbcc776e2055de1fdb59cd13683d9aa0a8f265b5bf9"},
-    {file = "grpcio-1.53.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eaf8e3b97caaf9415227a3c6ca5aa8d800fecadd526538d2bf8f11af783f1550"},
-    {file = "grpcio-1.53.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:da95778d37be8e4e9afca771a83424f892296f5dfb2a100eda2571a1d8bbc0dc"},
-    {file = "grpcio-1.53.0-cp311-cp311-win32.whl", hash = "sha256:e4f513d63df6336fd84b74b701f17d1bb3b64e9d78a6ed5b5e8a198bbbe8bbfa"},
-    {file = "grpcio-1.53.0-cp311-cp311-win_amd64.whl", hash = "sha256:ddb2511fbbb440ed9e5c9a4b9b870f2ed649b7715859fd6f2ebc585ee85c0364"},
-    {file = "grpcio-1.53.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:2a912397eb8d23c177d6d64e3c8bc46b8a1c7680b090d9f13a640b104aaec77c"},
-    {file = "grpcio-1.53.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:55930c56b8f5b347d6c8c609cc341949a97e176c90f5cbb01d148d778f3bbd23"},
-    {file = "grpcio-1.53.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6601d812105583948ab9c6e403a7e2dba6e387cc678c010e74f2d6d589d1d1b3"},
-    {file = "grpcio-1.53.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c705e0c21acb0e8478a00e7e773ad0ecdb34bd0e4adc282d3d2f51ba3961aac7"},
-    {file = "grpcio-1.53.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba074af9ca268ad7b05d3fc2b920b5fb3c083da94ab63637aaf67f4f71ecb755"},
-    {file = "grpcio-1.53.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:14817de09317dd7d3fbc8272864288320739973ef0f4b56bf2c0032349da8cdf"},
-    {file = "grpcio-1.53.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c7ad9fbedb93f331c2e9054e202e95cf825b885811f1bcbbdfdc301e451442db"},
-    {file = "grpcio-1.53.0-cp37-cp37m-win_amd64.whl", hash = "sha256:dad5b302a4c21c604d88a5d441973f320134e6ff6a84ecef9c1139e5ffd466f6"},
-    {file = "grpcio-1.53.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:fa8eaac75d3107e3f5465f2c9e3bbd13db21790c6e45b7de1756eba16b050aca"},
-    {file = "grpcio-1.53.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:104a2210edd3776c38448b4f76c2f16e527adafbde171fc72a8a32976c20abc7"},
-    {file = "grpcio-1.53.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:dbc1ba968639c1d23476f75c356e549e7bbf2d8d6688717dcab5290e88e8482b"},
-    {file = "grpcio-1.53.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95952d3fe795b06af29bb8ec7bbf3342cdd867fc17b77cc25e6733d23fa6c519"},
-    {file = "grpcio-1.53.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f144a790f14c51b8a8e591eb5af40507ffee45ea6b818c2482f0457fec2e1a2e"},
-    {file = "grpcio-1.53.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0698c094688a2dd4c7c2f2c0e3e142cac439a64d1cef6904c97f6cde38ba422f"},
-    {file = "grpcio-1.53.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6b6d60b0958be711bab047e9f4df5dbbc40367955f8651232bfdcdd21450b9ab"},
-    {file = "grpcio-1.53.0-cp38-cp38-win32.whl", hash = "sha256:1948539ce78805d4e6256ab0e048ec793956d54787dc9d6777df71c1d19c7f81"},
-    {file = "grpcio-1.53.0-cp38-cp38-win_amd64.whl", hash = "sha256:df9ba1183b3f649210788cf80c239041dddcb375d6142d8bccafcfdf549522cd"},
-    {file = "grpcio-1.53.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:19caa5b7282a89b799e63776ff602bb39604f7ca98db6df27e2de06756ae86c3"},
-    {file = "grpcio-1.53.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b5bd026ac928c96cc23149e6ef79183125542062eb6d1ccec34c0a37e02255e7"},
-    {file = "grpcio-1.53.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:7dc8584ca6c015ad82e186e82f4c0fe977394588f66b8ecfc4ec873285314619"},
-    {file = "grpcio-1.53.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2eddaae8af625e45b5c8500dcca1043264d751a6872cde2eda5022df8a336959"},
-    {file = "grpcio-1.53.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5fb6f3d7824696c1c9f2ad36ddb080ba5a86f2d929ef712d511b4d9972d3d27"},
-    {file = "grpcio-1.53.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8270d1dc2c98ab57e6dbf36fa187db8df4c036f04a398e5d5e25b4e01a766d70"},
-    {file = "grpcio-1.53.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:976a7f24eb213e8429cab78d5e120500dfcdeb01041f1f5a77b17b9101902615"},
-    {file = "grpcio-1.53.0-cp39-cp39-win32.whl", hash = "sha256:9c84a481451e7174f3a764a44150f93b041ab51045aa33d7b5b68b6979114e48"},
-    {file = "grpcio-1.53.0-cp39-cp39-win_amd64.whl", hash = "sha256:6beb84f83360ff29a3654f43f251ec11b809dcb5524b698d711550243debd289"},
-    {file = "grpcio-1.53.0.tar.gz", hash = "sha256:a4952899b4931a6ba12951f9a141ef3e74ff8a6ec9aa2dc602afa40f63595e33"},
+    {file = "grpcio-1.54.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:a947d5298a0bbdd4d15671024bf33e2b7da79a70de600ed29ba7e0fef0539ebb"},
+    {file = "grpcio-1.54.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:e355ee9da9c1c03f174efea59292b17a95e0b7b4d7d2a389265f731a9887d5a9"},
+    {file = "grpcio-1.54.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:73c238ef6e4b64272df7eec976bb016c73d3ab5a6c7e9cd906ab700523d312f3"},
+    {file = "grpcio-1.54.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c59d899ee7160638613a452f9a4931de22623e7ba17897d8e3e348c2e9d8d0b"},
+    {file = "grpcio-1.54.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48cb7af77238ba16c77879009003f6b22c23425e5ee59cb2c4c103ec040638a5"},
+    {file = "grpcio-1.54.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2262bd3512ba9e9f0e91d287393df6f33c18999317de45629b7bd46c40f16ba9"},
+    {file = "grpcio-1.54.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:224166f06ccdaf884bf35690bf4272997c1405de3035d61384ccb5b25a4c1ca8"},
+    {file = "grpcio-1.54.0-cp310-cp310-win32.whl", hash = "sha256:ed36e854449ff6c2f8ee145f94851fe171298e1e793f44d4f672c4a0d78064e7"},
+    {file = "grpcio-1.54.0-cp310-cp310-win_amd64.whl", hash = "sha256:27fb030a4589d2536daec5ff5ba2a128f4f155149efab578fe2de2cb21596d3d"},
+    {file = "grpcio-1.54.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:f4a7dca8ccd8023d916b900aa3c626f1bd181bd5b70159479b142f957ff420e4"},
+    {file = "grpcio-1.54.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:1209d6b002b26e939e4c8ea37a3d5b4028eb9555394ea69fb1adbd4b61a10bb8"},
+    {file = "grpcio-1.54.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:860fcd6db7dce80d0a673a1cc898ce6bc3d4783d195bbe0e911bf8a62c93ff3f"},
+    {file = "grpcio-1.54.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3930669c9e6f08a2eed824738c3d5699d11cd47a0ecc13b68ed11595710b1133"},
+    {file = "grpcio-1.54.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62117486460c83acd3b5d85c12edd5fe20a374630475388cfc89829831d3eb79"},
+    {file = "grpcio-1.54.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e3e526062c690517b42bba66ffe38aaf8bc99a180a78212e7b22baa86902f690"},
+    {file = "grpcio-1.54.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ebff0738be0499d7db74d20dca9f22a7b27deae31e1bf92ea44924fd69eb6251"},
+    {file = "grpcio-1.54.0-cp311-cp311-win32.whl", hash = "sha256:21c4a1aae861748d6393a3ff7867473996c139a77f90326d9f4104bebb22d8b8"},
+    {file = "grpcio-1.54.0-cp311-cp311-win_amd64.whl", hash = "sha256:3db71c6f1ab688d8dfc102271cedc9828beac335a3a4372ec54b8bf11b43fd29"},
+    {file = "grpcio-1.54.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:960b176e0bb2b4afeaa1cd2002db1e82ae54c9b6e27ea93570a42316524e77cf"},
+    {file = "grpcio-1.54.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:d8ae6e0df3a608e99ee1acafaafd7db0830106394d54571c1ece57f650124ce9"},
+    {file = "grpcio-1.54.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:c33744d0d1a7322da445c0fe726ea6d4e3ef2dfb0539eadf23dce366f52f546c"},
+    {file = "grpcio-1.54.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d109df30641d050e009105f9c9ca5a35d01e34d2ee2a4e9c0984d392fd6d704"},
+    {file = "grpcio-1.54.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:775a2f70501370e5ba54e1ee3464413bff9bd85bd9a0b25c989698c44a6fb52f"},
+    {file = "grpcio-1.54.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c55a9cf5cba80fb88c850915c865b8ed78d5e46e1f2ec1b27692f3eaaf0dca7e"},
+    {file = "grpcio-1.54.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1fa7d6ddd33abbd3c8b3d7d07c56c40ea3d1891ce3cd2aa9fa73105ed5331866"},
+    {file = "grpcio-1.54.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ed3d458ded32ff3a58f157b60cc140c88f7ac8c506a1c567b2a9ee8a2fd2ce54"},
+    {file = "grpcio-1.54.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:5942a3e05630e1ef5b7b5752e5da6582460a2e4431dae603de89fc45f9ec5aa9"},
+    {file = "grpcio-1.54.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:125ed35aa3868efa82eabffece6264bf638cfdc9f0cd58ddb17936684aafd0f8"},
+    {file = "grpcio-1.54.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:b7655f809e3420f80ce3bf89737169a9dce73238af594049754a1128132c0da4"},
+    {file = "grpcio-1.54.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87f47bf9520bba4083d65ab911f8f4c0ac3efa8241993edd74c8dd08ae87552f"},
+    {file = "grpcio-1.54.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16bca8092dd994f2864fdab278ae052fad4913f36f35238b2dd11af2d55a87db"},
+    {file = "grpcio-1.54.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d2f62fb1c914a038921677cfa536d645cb80e3dd07dc4859a3c92d75407b90a5"},
+    {file = "grpcio-1.54.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a7caf553ccaf715ec05b28c9b2ab2ee3fdb4036626d779aa09cf7cbf54b71445"},
+    {file = "grpcio-1.54.0-cp38-cp38-win32.whl", hash = "sha256:2585b3c294631a39b33f9f967a59b0fad23b1a71a212eba6bc1e3ca6e6eec9ee"},
+    {file = "grpcio-1.54.0-cp38-cp38-win_amd64.whl", hash = "sha256:3b170e441e91e4f321e46d3cc95a01cb307a4596da54aca59eb78ab0fc03754d"},
+    {file = "grpcio-1.54.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:1382bc499af92901c2240c4d540c74eae8a671e4fe9839bfeefdfcc3a106b5e2"},
+    {file = "grpcio-1.54.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:031bbd26656e0739e4b2c81c172155fb26e274b8d0312d67aefc730bcba915b6"},
+    {file = "grpcio-1.54.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:a97b0d01ae595c997c1d9d8249e2d2da829c2d8a4bdc29bb8f76c11a94915c9a"},
+    {file = "grpcio-1.54.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:533eaf5b2a79a3c6f35cbd6a095ae99cac7f4f9c0e08bdcf86c130efd3c32adf"},
+    {file = "grpcio-1.54.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49eace8ea55fbc42c733defbda1e4feb6d3844ecd875b01bb8b923709e0f5ec8"},
+    {file = "grpcio-1.54.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:30fbbce11ffeb4f9f91c13fe04899aaf3e9a81708bedf267bf447596b95df26b"},
+    {file = "grpcio-1.54.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:650f5f2c9ab1275b4006707411bb6d6bc927886874a287661c3c6f332d4c068b"},
+    {file = "grpcio-1.54.0-cp39-cp39-win32.whl", hash = "sha256:02000b005bc8b72ff50c477b6431e8886b29961159e8b8d03c00b3dd9139baed"},
+    {file = "grpcio-1.54.0-cp39-cp39-win_amd64.whl", hash = "sha256:6dc1e2c9ac292c9a484ef900c568ccb2d6b4dfe26dfa0163d5bc815bb836c78d"},
+    {file = "grpcio-1.54.0.tar.gz", hash = "sha256:eb0807323572642ab73fd86fe53d88d843ce617dd1ddf430351ad0759809a0ae"},
 ]
 
 [package.extras]
-protobuf = ["grpcio-tools (>=1.53.0)"]
+protobuf = ["grpcio-tools (>=1.54.0)"]
 
 [[package]]
 name = "gunicorn"
@@ -1011,18 +1049,19 @@ tornado = ["tornado (>=0.2)"]
 
 [[package]]
 name = "huggingface-hub"
-version = "0.13.4"
+version = "0.14.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
-    {file = "huggingface_hub-0.13.4-py3-none-any.whl", hash = "sha256:4d3d40593de6673d624a4baaaf249b9bf5165bfcafd1ad58de361931f0b4fda5"},
-    {file = "huggingface_hub-0.13.4.tar.gz", hash = "sha256:db83d9c2f76aed8cf49893ffadd6be24e82074da2f64b1d36b8ba40eb255e115"},
+    {file = "huggingface_hub-0.14.1-py3-none-any.whl", hash = "sha256:9fc619170d800ff3793ad37c9757c255c8783051e1b5b00501205eb43ccc4f27"},
+    {file = "huggingface_hub-0.14.1.tar.gz", hash = "sha256:9ab899af8e10922eac65e290d60ab956882ab0bf643e3d990b1394b6b47b7fbc"},
 ]
 
 [package.dependencies]
 filelock = "*"
+fsspec = "*"
 packaging = ">=20.9"
 pyyaml = ">=5.1"
 requests = "*"
@@ -1030,26 +1069,26 @@ tqdm = ">=4.42.1"
 typing-extensions = ">=3.7.4.3"
 
 [package.extras]
-all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
 cli = ["InquirerPy (==0.3.4)"]
-dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
 fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
 quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"]
 tensorflow = ["graphviz", "pydot", "tensorflow"]
-testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "gradio", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"]
 torch = ["torch"]
 typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
 
 [[package]]
 name = "identify"
-version = "2.5.22"
+version = "2.5.23"
 description = "File identification library for Python"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "identify-2.5.22-py2.py3-none-any.whl", hash = "sha256:f0faad595a4687053669c112004178149f6c326db71ee999ae4636685753ad2f"},
-    {file = "identify-2.5.22.tar.gz", hash = "sha256:f7a93d6cf98e29bd07663c60728e7a4057615068d7a639d132dc883b2d54d31e"},
+    {file = "identify-2.5.23-py2.py3-none-any.whl", hash = "sha256:17d9351c028a781456965e781ed2a435755cac655df1ebd930f7186b54399312"},
+    {file = "identify-2.5.23.tar.gz", hash = "sha256:50b01b9d5f73c6b53e5fa2caf9f543d3e657a9d0bbdeb203ebb8d45960ba7433"},
 ]
 
 [package.extras]
@@ -1069,14 +1108,14 @@ files = [
 
 [[package]]
 name = "importlib-metadata"
-version = "6.3.0"
+version = "6.6.0"
 description = "Read metadata from Python packages"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "importlib_metadata-6.3.0-py3-none-any.whl", hash = "sha256:8f8bd2af397cf33bd344d35cfe7f489219b7d14fc79a3f854b75b8417e9226b0"},
-    {file = "importlib_metadata-6.3.0.tar.gz", hash = "sha256:23c2bcae4762dfb0bbe072d358faec24957901d75b6c4ab11172c0c982532402"},
+    {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"},
+    {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"},
 ]
 
 [package.dependencies]
@@ -1815,14 +1854,14 @@ files = [
 
 [[package]]
 name = "packaging"
-version = "23.0"
+version = "23.1"
 description = "Core utilities for Python packages"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
-    {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
+    {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
+    {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
 ]
 
 [[package]]
@@ -1960,19 +1999,19 @@ files = [
 
 [[package]]
 name = "platformdirs"
-version = "3.2.0"
+version = "3.4.0"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "platformdirs-3.2.0-py3-none-any.whl", hash = "sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e"},
-    {file = "platformdirs-3.2.0.tar.gz", hash = "sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08"},
+    {file = "platformdirs-3.4.0-py3-none-any.whl", hash = "sha256:01437886022decaf285d8972f9526397bfae2ac55480ed372ed6d9eca048870a"},
+    {file = "platformdirs-3.4.0.tar.gz", hash = "sha256:a5e1536e5ea4b1c238a1364da17ff2993d5bd28e15600c2c8224008aff6bbcad"},
 ]
 
 [package.extras]
-docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"]
-test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
+docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
 
 [[package]]
 name = "pluggy"
@@ -2270,14 +2309,14 @@ files = [
 
 [[package]]
 name = "pygments"
-version = "2.14.0"
+version = "2.15.1"
 description = "Pygments is a syntax highlighting package written in Python."
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "Pygments-2.14.0-py3-none-any.whl", hash = "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717"},
-    {file = "Pygments-2.14.0.tar.gz", hash = "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297"},
+    {file = "Pygments-2.15.1-py3-none-any.whl", hash = "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"},
+    {file = "Pygments-2.15.1.tar.gz", hash = "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c"},
 ]
 
 [package.extras]
@@ -2944,14 +2983,14 @@ tornado = ["tornado (>=5)"]
 
 [[package]]
 name = "setuptools"
-version = "67.6.1"
+version = "67.7.2"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "setuptools-67.6.1-py3-none-any.whl", hash = "sha256:e728ca814a823bf7bf60162daf9db95b93d532948c4c0bea762ce62f60189078"},
-    {file = "setuptools-67.6.1.tar.gz", hash = "sha256:257de92a9d50a60b8e22abfcbb771571fde0dbf3ec234463212027a4eeecbe9a"},
+    {file = "setuptools-67.7.2-py3-none-any.whl", hash = "sha256:23aaf86b85ca52ceb801d32703f12d77517b2556af839621c641fca11287952b"},
+    {file = "setuptools-67.7.2.tar.gz", hash = "sha256:f104fa03692a2602fa0fec6c6a9e63b6c8a968de13e17c026957dd1f53d80990"},
 ]
 
 [package.extras]
@@ -3431,15 +3470,14 @@ vision = ["Pillow"]
 
 [[package]]
 name = "tritonclient"
-version = "2.32.0"
+version = "2.33.0"
 description = "Python client library and utilities for communicating with Triton Inference Server"
 category = "main"
 optional = false
 python-versions = "*"
 files = [
-    {file = "tritonclient-2.32.0-py3-none-any.whl", hash = "sha256:2a9077a0f1424ef521ee89a519cdbe29892a058068b195641e83ba97817f150d"},
-    {file = "tritonclient-2.32.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:0431006258c0cca6ecb840d0af5b97bd40e13ec3224005dcd31da14f9d396421"},
-    {file = "tritonclient-2.32.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8a1476c64c07f8873a65025bfd14d40ffc153e0966bb2b361939ad4eceec0a6a"},
+    {file = "tritonclient-2.33.0-py3-none-any.whl", hash = "sha256:8fd7db59c76a6e3e4506e682a3d5ba549685b70baf7c7ff560701852774ba0f5"},
+    {file = "tritonclient-2.33.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:92c162dac8ed25724bc80d8d8cc6dd77d8518cd6d8fa0903dae75261a609c24c"},
 ]
 
 [package.dependencies]
@@ -3525,14 +3563,14 @@ files = [
 
 [[package]]
 name = "types-pyopenssl"
-version = "23.1.0.1"
+version = "23.1.0.2"
 description = "Typing stubs for pyOpenSSL"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-pyOpenSSL-23.1.0.1.tar.gz", hash = "sha256:59044283c475eaa5a29b36a903c123d52bdf4a7e012f0a1ca0e41115b99216da"},
-    {file = "types_pyOpenSSL-23.1.0.1-py3-none-any.whl", hash = "sha256:ac7fbc240930c2f9a1cbd2d04f9cb14ad0f15b0ad8d6528732a83747b1b2086e"},
+    {file = "types-pyOpenSSL-23.1.0.2.tar.gz", hash = "sha256:20b80971b86240e8432a1832bd8124cea49c3088c7bfc77dfd23be27ffe4a517"},
+    {file = "types_pyOpenSSL-23.1.0.2-py3-none-any.whl", hash = "sha256:b050641aeff6dfebf231ad719bdac12d53b8ee818d4afb67b886333484629957"},
 ]
 
 [package.dependencies]
@@ -3610,14 +3648,14 @@ files = [
 
 [[package]]
 name = "types-urllib3"
-version = "1.26.25.10"
+version = "1.26.25.11"
 description = "Typing stubs for urllib3"
 category = "dev"
 optional = false
 python-versions = "*"
 files = [
-    {file = "types-urllib3-1.26.25.10.tar.gz", hash = "sha256:c44881cde9fc8256d05ad6b21f50c4681eb20092552351570ab0a8a0653286d6"},
-    {file = "types_urllib3-1.26.25.10-py3-none-any.whl", hash = "sha256:12c744609d588340a07e45d333bf870069fc8793bcf96bae7a96d4712a42591d"},
+    {file = "types-urllib3-1.26.25.11.tar.gz", hash = "sha256:697102ddf4f781eed6f692353f40cee1098643526f5a8b99f49d2ede90fd3754"},
+    {file = "types_urllib3-1.26.25.11-py3-none-any.whl", hash = "sha256:04235e792139cf3624b25d38faab593456738fbdb7439634046172e3b1339400"},
 ]
 
 [[package]]
@@ -3682,24 +3720,24 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 
 [[package]]
 name = "virtualenv"
-version = "20.21.0"
+version = "20.22.0"
 description = "Virtual Python Environment builder"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "virtualenv-20.21.0-py3-none-any.whl", hash = "sha256:31712f8f2a17bd06234fa97fdf19609e789dd4e3e4bf108c3da71d710651adbc"},
-    {file = "virtualenv-20.21.0.tar.gz", hash = "sha256:f50e3e60f990a0757c9b68333c9fdaa72d7188caa417f96af9e52407831a3b68"},
+    {file = "virtualenv-20.22.0-py3-none-any.whl", hash = "sha256:48fd3b907b5149c5aab7c23d9790bea4cac6bc6b150af8635febc4cfeab1275a"},
+    {file = "virtualenv-20.22.0.tar.gz", hash = "sha256:278753c47aaef1a0f14e6db8a4c5e1e040e90aea654d0fc1dc7e0d8a42616cc3"},
 ]
 
 [package.dependencies]
 distlib = ">=0.3.6,<1"
-filelock = ">=3.4.1,<4"
-platformdirs = ">=2.4,<4"
+filelock = ">=3.11,<4"
+platformdirs = ">=3.2,<4"
 
 [package.extras]
-docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=22.12)"]
-test = ["covdefaults (>=2.2.2)", "coverage (>=7.1)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23)", "pytest (>=7.2.1)", "pytest-env (>=0.8.1)", "pytest-freezegun (>=0.4.2)", "pytest-mock (>=3.10)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)"]
+docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=22.12)"]
+test = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.3.1)", "pytest-env (>=0.8.1)", "pytest-freezegun (>=0.4.2)", "pytest-mock (>=3.10)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)"]
 
 [[package]]
 name = "wasabi"
@@ -3770,21 +3808,21 @@ bracex = ">=2.1.1"
 
 [[package]]
 name = "werkzeug"
-version = "2.2.3"
+version = "2.3.0"
 description = "The comprehensive WSGI web application library."
 category = "dev"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "Werkzeug-2.2.3-py3-none-any.whl", hash = "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"},
-    {file = "Werkzeug-2.2.3.tar.gz", hash = "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe"},
+    {file = "Werkzeug-2.3.0-py3-none-any.whl", hash = "sha256:340335057f72974d9281dbaf52c8090a9f9a59ba304ae814bf0656e6559c0020"},
+    {file = "Werkzeug-2.3.0.tar.gz", hash = "sha256:3b6b46926d052b8ebca97c4dc73c12e47bdd07d57ab0600c039c3155450227bc"},
 ]
 
 [package.dependencies]
 MarkupSafe = ">=2.1.1"
 
 [package.extras]
-watchdog = ["watchdog"]
+watchdog = ["watchdog (>=2.3)"]
 
 [[package]]
 name = "zipp"
@@ -3805,4 +3843,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "e6751e5126b0605f9ace1fbd2f85f277503d05516d92eeeae1bbf8d18d884ee7"
+content-hash = "5e4c5c585943a94c95de49e5005fc1bcd1bb13e2149746384cb155aba1f9fa68"
diff --git a/pyproject.toml b/pyproject.toml
index 38f99d2326..4c3a941300 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,6 @@ elasticsearch = "~8.5.3"
 pymongo = "~3.12.0"
 spacy = "~3.4.1"
 dacite = "~1.6.0"
-flashtext = "~2.7"
 langid = "~1.1.6"
 influxdb-client = "~1.34.0"
 jsonschema = "~4.4.0"
diff --git a/robotoff/prediction/category/matcher.py b/robotoff/prediction/category/matcher.py
index 9a786aa461..ff392fd1a6 100644
--- a/robotoff/prediction/category/matcher.py
+++ b/robotoff/prediction/category/matcher.py
@@ -5,14 +5,13 @@
 import re
 from typing import Iterable, Optional
 
-from flashtext import KeywordProcessor
-
 from robotoff import settings
 from robotoff.products import ProductDataset
 from robotoff.taxonomy import TaxonomyType, get_taxonomy
 from robotoff.types import Prediction, PredictionType, ServerType
 from robotoff.utils import dump_json, get_logger, load_json
 from robotoff.utils.text import (
+    KeywordProcessor,
     get_lemmatizing_nlp,
     strip_accents_v1,
     strip_consecutive_spaces,
diff --git a/robotoff/prediction/category/neural/keras_category_classifier_3_0/preprocessing.py b/robotoff/prediction/category/neural/keras_category_classifier_3_0/preprocessing.py
index 55a5bd25a1..9b5f7d7507 100644
--- a/robotoff/prediction/category/neural/keras_category_classifier_3_0/preprocessing.py
+++ b/robotoff/prediction/category/neural/keras_category_classifier_3_0/preprocessing.py
@@ -6,11 +6,11 @@
 from typing import Optional
 
 import numpy as np
-from flashtext import KeywordProcessor
 
 from robotoff import settings
 from robotoff.taxonomy import Taxonomy, fetch_taxonomy
 from robotoff.types import JSONType
+from robotoff.utils.text import KeywordProcessor
 
 from .text_utils import fold, get_tag
 
diff --git a/robotoff/prediction/ocr/brand.py b/robotoff/prediction/ocr/brand.py
index 49b2599535..7d3da8f395 100644
--- a/robotoff/prediction/ocr/brand.py
+++ b/robotoff/prediction/ocr/brand.py
@@ -1,13 +1,11 @@
 import functools
 from typing import Iterable, Optional, Union
 
-from flashtext import KeywordProcessor
-
 from robotoff import settings
 from robotoff.brands import get_brand_blacklist, keep_brand_from_taxonomy
 from robotoff.types import Prediction, PredictionType
 from robotoff.utils import get_logger, text_file_iter
-from robotoff.utils.text import get_tag
+from robotoff.utils.text import KeywordProcessor, get_tag
 
 from .dataclass import OCRResult, get_match_bounding_box, get_text
 from .utils import generate_keyword_processor
diff --git a/robotoff/prediction/ocr/image_flag.py b/robotoff/prediction/ocr/image_flag.py
index 127e2ccea9..a05e558356 100644
--- a/robotoff/prediction/ocr/image_flag.py
+++ b/robotoff/prediction/ocr/image_flag.py
@@ -1,11 +1,10 @@
 import functools
 from typing import Optional, Union
 
-from flashtext import KeywordProcessor
-
 from robotoff import settings
 from robotoff.types import Prediction, PredictionType
 from robotoff.utils import text_file_iter
+from robotoff.utils.text import KeywordProcessor
 
 from .dataclass import OCRResult, SafeSearchAnnotationLikelihood, get_text
 
diff --git a/robotoff/prediction/ocr/label.py b/robotoff/prediction/ocr/label.py
index 15f02fc723..ff81e196a9 100644
--- a/robotoff/prediction/ocr/label.py
+++ b/robotoff/prediction/ocr/label.py
@@ -2,11 +2,10 @@
 import re
 from typing import Iterable, Optional, Union
 
-from flashtext import KeywordProcessor
-
 from robotoff import settings
 from robotoff.types import Prediction, PredictionType
 from robotoff.utils import get_logger, text_file_iter
+from robotoff.utils.text import KeywordProcessor
 
 from .dataclass import OCRField, OCRRegex, OCRResult, get_match_bounding_box, get_text
 from .utils import generate_keyword_processor
diff --git a/robotoff/prediction/ocr/location.py b/robotoff/prediction/ocr/location.py
index 88b9eb66cd..09d9f94d4e 100644
--- a/robotoff/prediction/ocr/location.py
+++ b/robotoff/prediction/ocr/location.py
@@ -5,13 +5,11 @@
 from pathlib import Path
 from typing import BinaryIO, Iterable, Optional, Union
 
-from flashtext import KeywordProcessor
-
 from robotoff import settings
 from robotoff.types import Prediction, PredictionType
 from robotoff.utils import get_logger
 from robotoff.utils.cache import CachedStore
-from robotoff.utils.text import strip_accents_v1
+from robotoff.utils.text import KeywordProcessor, strip_accents_v1
 
 from .dataclass import OCRResult
 
diff --git a/robotoff/prediction/ocr/packager_code.py b/robotoff/prediction/ocr/packager_code.py
index d2e046e535..84a088fa0e 100644
--- a/robotoff/prediction/ocr/packager_code.py
+++ b/robotoff/prediction/ocr/packager_code.py
@@ -1,12 +1,11 @@
 import re
 from typing import Optional, Union
 
-from flashtext import KeywordProcessor
-
 from robotoff import settings
 from robotoff.types import Prediction, PredictionType
 from robotoff.utils import text_file_iter
 from robotoff.utils.cache import CachedStore
+from robotoff.utils.text import KeywordProcessor
 
 from .dataclass import OCRField, OCRRegex, OCRResult, get_match_bounding_box, get_text
 from .utils import generate_keyword_processor
diff --git a/robotoff/prediction/ocr/utils.py b/robotoff/prediction/ocr/utils.py
index d1f91a2ea7..1ea7eaed19 100644
--- a/robotoff/prediction/ocr/utils.py
+++ b/robotoff/prediction/ocr/utils.py
@@ -1,6 +1,6 @@
 from typing import Callable, Iterable, Optional
 
-from flashtext import KeywordProcessor
+from robotoff.utils.text import KeywordProcessor
 
 
 def generate_keyword_processor(
diff --git a/robotoff/utils/text.py b/robotoff/utils/text/__init__.py
similarity index 97%
rename from robotoff/utils/text.py
rename to robotoff/utils/text/__init__.py
index ce5af52271..bfb48e98a1 100644
--- a/robotoff/utils/text.py
+++ b/robotoff/utils/text/__init__.py
@@ -6,6 +6,7 @@
 
 from robotoff.utils import get_logger
 
+from .flashtext import KeywordProcessor  # noqa: F401
 from .fold_to_ascii import fold, fold_without_insertion_deletion
 
 logger = get_logger(__name__)
diff --git a/robotoff/utils/text/flashtext.py b/robotoff/utils/text/flashtext.py
new file mode 100644
index 0000000000..4e6dee2f12
--- /dev/null
+++ b/robotoff/utils/text/flashtext.py
@@ -0,0 +1,759 @@
+"""Copied and adapted from https://github.com/vi3k6i5/flashtext (MIT-licensed).
+
+Flashtext library is not maintained anymore, and we needed some bugs to be fixed
+(especially https://github.com/vi3k6i5/flashtext/issues/119).
+"""
+
+
+import functools
+import io
+import os
+import string
+from pathlib import Path
+from typing import Optional, Union
+
+
+class KeywordProcessor:
+    """KeywordProcessor
+
+    Attributes:
+        _keyword (str): Used as key to store keywords in trie dictionary.
+            Defaults to '_keyword_'
+        non_word_boundaries (set(str)): Characters that will determine if the word is continuing.
+            Defaults to set([A-Za-z0-9_])
+        keyword_trie_dict (dict): Trie dict built character by character, that is used for lookup
+            Defaults to empty dictionary
+        case_sensitive (boolean): if the search algorithm should be case sensitive or not.
+            Defaults to False
+
+    Examples:
+        >>> # import module
+        >>> from robotoff.utils.text import KeywordProcessor
+        >>> # Create an object of KeywordProcessor
+        >>> keyword_processor = KeywordProcessor()
+        >>> # add keywords
+        >>> keyword_names = ['NY', 'new-york', 'SF']
+        >>> clean_names = ['new york', 'new york', 'san francisco']
+        >>> for keyword_name, clean_name in zip(keyword_names, clean_names):
+        >>>     keyword_processor.add_keyword(keyword_name, clean_name)
+        >>> keywords_found = keyword_processor.extract_keywords('I love SF and NY. new-york is the best.')
+        >>> keywords_found
+        >>> ['san francisco', 'new york', 'new york']
+
+    Note:
+        * loosely based on `Aho-Corasick algorithm <https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm>`_.
+        * Idea came from this `Stack Overflow Question <https://stackoverflow.com/questions/44178449/regex-replace-is-taking-time-for-millions-of-documents-how-to-make-it-faster>`_.
+    """
+
+    def __init__(self, case_sensitive: bool = False):
+        """
+        Args:
+            case_sensitive (boolean): Keyword search should be case sensitive set or not.
+                Defaults to False
+        """
+        self._keyword = "_keyword_"
+        self._white_space_chars = set([".", "\t", "\n", "\a", " ", ","])
+        self.non_word_boundaries = set(string.digits + string.ascii_letters + "_")
+        self.keyword_trie_dict = dict()  # type: ignore
+        self.case_sensitive = case_sensitive
+        self._terms_in_trie = 0
+
+    def __len__(self) -> int:
+        """Number of terms present in the keyword_trie_dict
+
+        Returns:
+            length : int
+                Count of number of distinct terms in trie dictionary.
+
+        """
+        return self._terms_in_trie
+
+    def __contains__(self, word: str) -> bool:
+        """To check if word is present in the keyword_trie_dict
+
+        Args:
+            word : string
+                word that you want to check
+
+        Returns:
+            status : bool
+                If word is present as it is in keyword_trie_dict then we return True, else False
+
+        Examples:
+            >>> keyword_processor.add_keyword('Big Apple')
+            >>> 'Big Apple' in keyword_processor
+            >>> # True
+
+        """
+        if not self.case_sensitive:
+            word = word.lower()
+        current_dict = self.keyword_trie_dict
+        len_covered = 0
+        for char in word:
+            if char in current_dict:
+                current_dict = current_dict[char]
+                len_covered += 1
+            else:
+                break
+        return self._keyword in current_dict and len_covered == len(word)
+
+    def __getitem__(self, word: str) -> Optional[str]:
+        """if word is present in keyword_trie_dict return the clean name for it.
+
+        Args:
+            word : string
+                word that you want to check
+
+        Returns:
+            keyword : string
+                If word is present as it is in keyword_trie_dict then we return keyword mapped to it.
+
+        Examples:
+            >>> keyword_processor.add_keyword('Big Apple', 'New York')
+            >>> keyword_processor['Big Apple']
+            >>> # New York
+        """
+        if not self.case_sensitive:
+            word = word.lower()
+        current_dict = self.keyword_trie_dict
+        len_covered = 0
+        for char in word:
+            if char in current_dict:
+                current_dict = current_dict[char]
+                len_covered += 1
+            else:
+                break
+        if self._keyword in current_dict and len_covered == len(word):
+            return current_dict[self._keyword]
+
+        return None
+
+    def __setitem__(self, keyword: str, clean_name: Optional[str] = None) -> bool:
+        """To add keyword to the dictionary
+        pass the keyword and the clean name it maps to.
+
+        Args:
+            keyword : string
+                keyword that you want to identify
+
+            clean_name : string
+                clean term for that keyword that you would want to get back in return or replace
+                if not provided, keyword will be used as the clean name also.
+
+        Examples:
+            >>> keyword_processor['Big Apple'] = 'New York'
+        """
+        status = False
+        if not clean_name and keyword:
+            clean_name = keyword
+
+        if keyword and clean_name:
+            if not self.case_sensitive:
+                keyword = keyword.lower()
+            current_dict = self.keyword_trie_dict
+            for letter in keyword:
+                current_dict = current_dict.setdefault(letter, {})
+            if self._keyword not in current_dict:
+                status = True
+                self._terms_in_trie += 1
+            current_dict[self._keyword] = clean_name
+        return status
+
+    def __delitem__(self, keyword: str) -> bool:
+        """To remove keyword from the dictionary
+        pass the keyword and the clean name it maps to.
+
+        Args:
+            keyword : string
+                keyword that you want to remove if it's present
+
+        Examples:
+            >>> keyword_processor.add_keyword('Big Apple')
+            >>> del keyword_processor['Big Apple']
+        """
+        status = False
+        if keyword:
+            if not self.case_sensitive:
+                keyword = keyword.lower()
+            current_dict = self.keyword_trie_dict
+            character_trie_list = []
+            for letter in keyword:
+                if letter in current_dict:
+                    character_trie_list.append((letter, current_dict))
+                    current_dict = current_dict[letter]
+                else:
+                    # if character is not found, break out of the loop
+                    current_dict = None  # type: ignore
+                    break
+            # remove the characters from trie dict if there are no other keywords with them
+            if current_dict and self._keyword in current_dict:
+                # we found a complete match for input keyword.
+                character_trie_list.append((self._keyword, current_dict))
+                character_trie_list.reverse()
+
+                for key_to_remove, dict_pointer in character_trie_list:
+                    if len(dict_pointer.keys()) == 1:
+                        dict_pointer.pop(key_to_remove)
+                    else:
+                        # more than one key means more than 1 path.
+                        # Delete not required path and keep the other
+                        dict_pointer.pop(key_to_remove)
+                        break
+                # successfully removed keyword
+                status = True
+                self._terms_in_trie -= 1
+        return status
+
+    def __iter__(self):
+        """Disabled iteration as get_all_keywords() is the right way to iterate"""
+        raise NotImplementedError("Please use get_all_keywords() instead")
+
+    def set_non_word_boundaries(self, non_word_boundaries: set[str]) -> None:
+        """set of characters that will be considered as part of word.
+
+        Args:
+            non_word_boundaries (set(str)):
+                Set of characters that will be considered as part of word.
+
+        """
+        self.non_word_boundaries = non_word_boundaries
+
+    def add_non_word_boundary(self, character: str) -> None:
+        """add a character that will be considered as part of word.
+
+        Args:
+            character (char):
+                Character that will be considered as part of word.
+
+        """
+        self.non_word_boundaries.add(character)
+
+    def add_keyword(self, keyword: str, clean_name: Optional[str] = None) -> bool:
+        """To add one or more keywords to the dictionary
+        pass the keyword and the clean name it maps to.
+
+        Args:
+            keyword : string
+                keyword that you want to identify
+
+            clean_name : string
+                clean term for that keyword that you would want to get back in return or replace
+                if not provided, keyword will be used as the clean name also.
+
+        Returns:
+            status : bool
+                The return value. True for success, False otherwise.
+
+        Examples:
+            >>> keyword_processor.add_keyword('Big Apple', 'New York')
+            >>> # This case 'Big Apple' will return 'New York'
+            >>> # OR
+            >>> keyword_processor.add_keyword('Big Apple')
+            >>> # This case 'Big Apple' will return 'Big Apple'
+        """
+        return self.__setitem__(keyword, clean_name)
+
+    def remove_keyword(self, keyword: str) -> bool:
+        """To remove one or more keywords from the dictionary
+        pass the keyword and the clean name it maps to.
+
+        Args:
+            keyword : string
+                keyword that you want to remove if it's present
+
+        Returns:
+            status : bool
+                The return value. True for success, False otherwise.
+
+        Examples:
+            >>> keyword_processor.add_keyword('Big Apple')
+            >>> keyword_processor.remove_keyword('Big Apple')
+            >>> # Returns True
+            >>> # This case 'Big Apple' will no longer be a recognized keyword
+            >>> keyword_processor.remove_keyword('Big Apple')
+            >>> # Returns False
+
+        """
+        return self.__delitem__(keyword)
+
+    def get_keyword(self, word: str) -> Optional[str]:
+        """if word is present in keyword_trie_dict return the clean name for it.
+
+        Args:
+            word : string
+                word that you want to check
+
+        Returns:
+            keyword : string
+                If word is present as it is in keyword_trie_dict then we return keyword mapped to it.
+
+        Examples:
+            >>> keyword_processor.add_keyword('Big Apple', 'New York')
+            >>> keyword_processor.get('Big Apple')
+            >>> # New York
+        """
+        return self.__getitem__(word)
+
+    def add_keyword_from_file(
+        self, keyword_file: Union[Path, str], encoding: str = "utf-8"
+    ) -> None:
+        """To add keywords from a file
+
+        Args:
+            keyword_file : path to keywords file
+            encoding : specify the encoding of the file
+
+        Examples:
+            keywords file format can be like:
+
+            >>> # Option 1: keywords.txt content
+            >>> # java_2e=>java
+            >>> # java programing=>java
+            >>> # product management=>product management
+            >>> # product management techniques=>product management
+
+            >>> # Option 2: keywords.txt content
+            >>> # java
+            >>> # python
+            >>> # c++
+
+            >>> keyword_processor.add_keyword_from_file('keywords.txt')
+
+        Raises:
+            IOError: If `keyword_file` path is not valid
+
+        """
+        if not os.path.isfile(keyword_file):
+            raise IOError("Invalid file path {}".format(keyword_file))
+        with io.open(keyword_file, encoding=encoding) as f:
+            for line in f:
+                if "=>" in line:
+                    keyword, clean_name = line.split("=>")
+                    self.add_keyword(keyword, clean_name.strip())
+                else:
+                    keyword = line.strip()
+                    self.add_keyword(keyword)
+
+    def add_keywords_from_dict(self, keyword_dict: dict[str, str]) -> None:
+        """To add keywords from a dictionary
+
+        Args:
+            keyword_dict (dict): A dictionary with `str` key and (list `str`) as value
+
+        Examples:
+            >>> keyword_dict = {
+                    "java": ["java_2e", "java programing"],
+                    "product management": ["PM", "product manager"]
+                }
+            >>> keyword_processor.add_keywords_from_dict(keyword_dict)
+
+        Raises:
+            AttributeError: If value for a key in `keyword_dict` is not a list.
+
+        """
+        for clean_name, keywords in keyword_dict.items():
+            if not isinstance(keywords, list):
+                raise AttributeError(
+                    "Value of key {} should be a list".format(clean_name)
+                )
+
+            for keyword in keywords:
+                self.add_keyword(keyword, clean_name)
+
+    def remove_keywords_from_dict(self, keyword_dict: dict[str, str]):
+        """To remove keywords from a dictionary
+
+        Args:
+            keyword_dict (dict): A dictionary with `str` key and (list `str`) as value
+
+        Examples:
+            >>> keyword_dict = {
+                    "java": ["java_2e", "java programing"],
+                    "product management": ["PM", "product manager"]
+                }
+            >>> keyword_processor.remove_keywords_from_dict(keyword_dict)
+
+        Raises:
+            AttributeError: If value for a key in `keyword_dict` is not a list.
+
+        """
+        for clean_name, keywords in keyword_dict.items():
+            if not isinstance(keywords, list):
+                raise AttributeError(
+                    "Value of key {} should be a list".format(clean_name)
+                )
+
+            for keyword in keywords:
+                self.remove_keyword(keyword)
+
+    def add_keywords_from_list(self, keyword_list: list[str]) -> None:
+        """To add keywords from a list
+
+        Args:
+            keyword_list (list(str)): List of keywords to add
+
+        Examples:
+            >>> keyword_processor.add_keywords_from_list(["java", "python"]})
+        Raises:
+            AttributeError: If `keyword_list` is not a list.
+
+        """
+        if not isinstance(keyword_list, list):
+            raise AttributeError("keyword_list should be a list")
+
+        for keyword in keyword_list:
+            self.add_keyword(keyword)
+
+    def remove_keywords_from_list(self, keyword_list: list[str]) -> None:
+        """To remove keywords present in list
+
+        Args:
+            keyword_list (list(str)): List of keywords to remove
+
+        Examples:
+            >>> keyword_processor.remove_keywords_from_list(["java", "python"]})
+        Raises:
+            AttributeError: If `keyword_list` is not a list.
+
+        """
+        if not isinstance(keyword_list, list):
+            raise AttributeError("keyword_list should be a list")
+
+        for keyword in keyword_list:
+            self.remove_keyword(keyword)
+
+    def get_all_keywords(
+        self, term_so_far: str = "", current_dict: Optional[dict] = None
+    ) -> dict:
+        """Recursively builds a dictionary of keywords present in the dictionary
+        And the clean name mapped to those keywords.
+
+        Args:
+            term_so_far : string
+                term built so far by adding all previous characters
+            current_dict : dict
+                current recursive position in dictionary
+
+        Returns:
+            terms_present : dict
+                A map of key and value where each key is a term in the keyword_trie_dict.
+                And value mapped to it is the clean name mapped to it.
+
+        Examples:
+            >>> keyword_processor = KeywordProcessor()
+            >>> keyword_processor.add_keyword('j2ee', 'Java')
+            >>> keyword_processor.add_keyword('Python', 'Python')
+            >>> keyword_processor.get_all_keywords()
+            >>> {'j2ee': 'Java', 'python': 'Python'}
+            >>> # NOTE: for case_insensitive all keys will be lowercased.
+        """
+        terms_present = {}
+        if not term_so_far:
+            term_so_far = ""
+        if current_dict is None:
+            current_dict = self.keyword_trie_dict
+        for key in current_dict:
+            if key == "_keyword_":
+                terms_present[term_so_far] = current_dict[key]
+            else:
+                sub_values = self.get_all_keywords(term_so_far + key, current_dict[key])
+                for key in sub_values:
+                    terms_present[key] = sub_values[key]
+        return terms_present
+
+    def extract_keywords(
+        self, sentence: str, span_info: bool = False, max_cost: int = 0
+    ) -> list[Union[str, tuple[str, int, int]]]:
+        """Searches in the string for all keywords present in corpus.
+        Keywords present are added to a list `keywords_extracted` and returned.
+
+        Args:
+            sentence (str): Line of text where we will search for keywords
+            span_info (bool): True if you need to span the boundaries where the extraction has been performed
+            max_cost (int): maximum levensthein distance to accept when extracting keywords
+
+        Returns:
+            keywords_extracted (list(str)): List of terms/keywords found in sentence that match our corpus
+
+        Examples:
+            >>> from robotoff.utils.text import KeywordProcessor
+            >>> keyword_processor = KeywordProcessor()
+            >>> keyword_processor.add_keyword('Big Apple', 'New York')
+            >>> keyword_processor.add_keyword('Bay Area')
+            >>> keywords_found = keyword_processor.extract_keywords('I love Big Apple and Bay Area.')
+            >>> keywords_found
+            >>> ['New York', 'Bay Area']
+            >>> keywords_found = keyword_processor.extract_keywords('I love Big Aple and Baay Area.', max_cost=1)
+            >>> keywords_found
+            >>> ['New York', 'Bay Area']
+        """
+        keywords_extracted: list[Union[str, tuple[str, int, int]]] = []
+        if not sentence:
+            # if sentence is empty or none just return empty list
+            return keywords_extracted
+
+        index_mapping = get_index_mapping(sentence, self.case_sensitive)
+        get_span_indices = functools.partial(
+            _get_span_indices, index_mapping=index_mapping
+        )
+        if not self.case_sensitive:
+            sentence = sentence.lower()
+        current_dict = self.keyword_trie_dict
+        sequence_start_pos = 0
+        sequence_end_pos = 0
+        reset_current_dict = False
+        idx = 0
+        sentence_len = len(sentence)
+        curr_cost = max_cost
+        while idx < sentence_len:
+            char = sentence[idx]
+            # when we reach a character that might denote word end
+            if char not in self.non_word_boundaries:
+
+                # if end is present in current_dict
+                if self._keyword in current_dict or char in current_dict:
+                    # update longest sequence found
+                    sequence_found = None
+                    longest_sequence_found = None
+                    is_longer_seq_found = False
+                    if self._keyword in current_dict:
+                        sequence_found = current_dict[self._keyword]
+                        longest_sequence_found = current_dict[self._keyword]
+                        sequence_end_pos = idx
+
+                    # re look for longest_sequence from this position
+                    if char in current_dict:
+                        current_dict_continued = current_dict[char]
+
+                        idy = idx + 1
+                        while idy < sentence_len:
+                            inner_char = sentence[idy]
+                            if (
+                                inner_char not in self.non_word_boundaries
+                                and self._keyword in current_dict_continued
+                            ):
+                                # update longest sequence found
+                                longest_sequence_found = current_dict_continued[
+                                    self._keyword
+                                ]
+                                sequence_end_pos = idy
+                                is_longer_seq_found = True
+                            if inner_char in current_dict_continued:
+                                current_dict_continued = current_dict_continued[
+                                    inner_char
+                                ]
+                            elif curr_cost > 0:
+                                next_word = self.get_next_word(sentence[idy:])
+                                current_dict_continued, cost, _ = next(
+                                    self.levensthein(
+                                        next_word,
+                                        max_cost=curr_cost,
+                                        start_node=current_dict_continued,
+                                    ),
+                                    ({}, 0, 0),
+                                )  # current_dict_continued to empty dict by default, so next iteration goes to a `break`
+                                curr_cost -= cost
+                                idy += len(next_word) - 1
+                                if not current_dict_continued:
+                                    break
+                            else:
+                                break
+                            idy += 1
+                        else:
+                            # end of sentence reached.
+                            if self._keyword in current_dict_continued:
+                                # update longest sequence found
+                                longest_sequence_found = current_dict_continued[
+                                    self._keyword
+                                ]
+                                sequence_end_pos = idy
+                                is_longer_seq_found = True
+                        if is_longer_seq_found:
+                            idx = sequence_end_pos
+                    current_dict = self.keyword_trie_dict
+                    if longest_sequence_found:
+                        keywords_extracted.append(
+                            (  # type: ignore
+                                longest_sequence_found,
+                                *get_span_indices(sequence_start_pos, idx),
+                            )
+                        )
+                        curr_cost = max_cost
+                    reset_current_dict = True
+                else:
+                    # we reset current_dict
+                    current_dict = self.keyword_trie_dict
+                    reset_current_dict = True
+            elif char in current_dict:
+                # we can continue from this char
+                current_dict = current_dict[char]
+            elif curr_cost > 0:
+                next_word = self.get_next_word(sentence[idx:])
+                current_dict, cost, _ = next(
+                    self.levensthein(
+                        next_word, max_cost=curr_cost, start_node=current_dict
+                    ),
+                    (self.keyword_trie_dict, 0, 0),
+                )
+                curr_cost -= cost
+                idx += len(next_word) - 1
+            else:
+                # we reset current_dict
+                current_dict = self.keyword_trie_dict
+                reset_current_dict = True
+                # skip to end of word
+                idy = idx + 1
+                while idy < sentence_len:
+                    char = sentence[idy]
+                    if char not in self.non_word_boundaries:
+                        break
+                    idy += 1
+                idx = idy
+            # if we are end of sentence and have a sequence discovered
+            if idx + 1 >= sentence_len:
+                if self._keyword in current_dict:
+                    sequence_found = current_dict[self._keyword]
+                    keywords_extracted.append(
+                        (
+                            sequence_found,
+                            *get_span_indices(sequence_start_pos, sentence_len),
+                        )
+                    )
+            idx += 1
+            if reset_current_dict:
+                reset_current_dict = False
+                sequence_start_pos = idx
+        if span_info:
+            return keywords_extracted
+        return [value[0] for value in keywords_extracted]
+
+    def get_next_word(self, sentence: str) -> str:
+        """
+        Retrieve the next word in the sequence
+        Iterate in the string until finding the first char not in non_word_boundaries
+
+        Args:
+            sentence (str): Line of text where we will look for the next word
+
+        Returns:
+            next_word (str): The next word in the sentence
+        Examples:
+            >>> from robotoff.utils.text import KeywordProcessor
+            >>> keyword_processor = KeywordProcessor()
+            >>> keyword_processor.add_keyword('Big Apple')
+            >>> 'Big'
+        """
+        next_word = str()
+        for char in sentence:
+            if char not in self.non_word_boundaries:
+                break
+            next_word += char
+        return next_word
+
+    def levensthein(
+        self, word: str, max_cost: int = 2, start_node: Optional[dict] = None
+    ):
+        """
+        Retrieve the nodes where there is a fuzzy match,
+        via levenshtein distance, and with respect to max_cost
+
+        Args:
+            word (str): word to find a fuzzy match for
+            max_cost (int): maximum levenshtein distance when performing the fuzzy match
+            start_node (dict): Trie node from which the search is performed
+
+        Yields:
+            node, cost, depth (tuple): A tuple containing the final node,
+                                      the cost (i.e the distance), and the depth in the trie
+
+        Examples:
+            >>> from robotoff.utils.text import KeywordProcessor
+            >>> keyword_processor = KeywordProcessor(case_sensitive=True)
+            >>> keyword_processor.add_keyword('Marie', 'Mary')
+            >>> next(keyword_processor.levensthein('Maria', max_cost=1))
+            >>> ({'_keyword_': 'Mary'}, 1, 5)
+            ...
+            >>> keyword_processor = KeywordProcessor(case_sensitive=True
+            >>> keyword_processor.add_keyword('Marie Blanc', 'Mary')
+            >>> next(keyword_processor.levensthein('Mari', max_cost=1))
+            >>> ({' ': {'B': {'l': {'a': {'n': {'c': {'_keyword_': 'Mary'}}}}}}}, 1, 5)
+        """
+        start_node = start_node or self.keyword_trie_dict
+        rows = range(len(word) + 1)
+
+        for char, node in start_node.items():
+            yield from self._levenshtein_rec(char, node, word, rows, max_cost, depth=1)
+
+    def _levenshtein_rec(self, char, node, word, rows, max_cost, depth=0):
+        n_columns = len(word) + 1
+        new_rows = [rows[0] + 1]
+        cost = 0
+
+        for col in range(1, n_columns):
+            insert_cost = new_rows[col - 1] + 1
+            delete_cost = rows[col] + 1
+            replace_cost = rows[col - 1] + int(word[col - 1] != char)
+            cost = min((insert_cost, delete_cost, replace_cost))
+            new_rows.append(cost)
+
+        stop_crit = isinstance(node, dict) and node.keys() & (
+            self._white_space_chars | {self._keyword}
+        )
+        if new_rows[-1] <= max_cost and stop_crit:
+            yield node, cost, depth
+
+        elif isinstance(node, dict) and min(new_rows) <= max_cost:
+            for new_char, new_node in node.items():
+                yield from self._levenshtein_rec(
+                    new_char, new_node, word, new_rows, max_cost, depth=depth + 1
+                )
+
+
+def _get_span_indices(
+    start_idx: int, end_idx: int, index_mapping: Optional[list[int]] = None
+) -> tuple[int, int]:
+    """Return the span indices (start index, end_index) by taking into account
+    index shift due to lowercasing. See `get_index_mapping` for further
+    explanations.
+
+    :param start_idx: start index of the match
+    :param end_idx: end index of the match
+    :param index_mapping: optional index mapping, defaults to None
+    :return: a (start_idx, end_idx) tuple, possibly shifted if `index_mapping`
+        is not None
+    """
+    if index_mapping is None:
+        return start_idx, end_idx
+    return index_mapping[start_idx], index_mapping[end_idx - 1] + 1
+
+
+# LATIN CAPITAL LETTER I WITH DOT ABOVE is the only letter than changes length
+# when lowercased: see http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt
+LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = "İ"
+
+
+def get_index_mapping(sentence: str, case_sensitive: bool) -> Optional[list[int]]:
+    """Get character index mapping (a list of indices of the same length as
+    the lowercased version of `sentence` or None).
+
+    When lowercasing a string, the string changes length if it contains LATIN
+    CAPITAL LETTER I WITH DOT ABOVE (`İ`): the length of the lowercased
+    version of this letter is 2 (instead of 1).
+    If `case_sensitive=True` or if there is no `İ` in the string, this function
+    returns None: we don't to account for character index shift during keyword
+    extraction.
+    Otherwise, we return a list of indices of the same length as the lowercased
+    version of `sentence`, that gives the character index in the original
+    sentence.
+
+    :param sentence: the original non-lowercased sentence
+    :param case_sensitive: whether the keyword extraction is case sensitive
+    """
+    if case_sensitive or LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE not in sentence:
+        return None
+    offsets = []
+    for idx, char in enumerate(sentence):
+        if char == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE:
+            offsets.append(idx)
+        offsets.append(idx)
+    return offsets
diff --git a/robotoff/utils/fold_to_ascii.py b/robotoff/utils/text/fold_to_ascii.py
similarity index 100%
rename from robotoff/utils/fold_to_ascii.py
rename to robotoff/utils/text/fold_to_ascii.py
diff --git a/tests/unit/data/flashtext/keyword_extractor_test_cases.json b/tests/unit/data/flashtext/keyword_extractor_test_cases.json
new file mode 100644
index 0000000000..c776e92c94
--- /dev/null
+++ b/tests/unit/data/flashtext/keyword_extractor_test_cases.json
@@ -0,0 +1 @@
+[{"sentence":"I like python","keyword_dict":{"Python":["python"]},"explanation":"Keyword at the end of the sentence.","keywords":["Python"],"keywords_case_sensitive":["Python"]},{"sentence":"I like python","keyword_dict":{"Pythonizer":["pythonizer"]},"explanation":"Incomplete keyword at the end of the sentence.","keywords":[],"keywords_case_sensitive":[]},{"sentence":"python I like","keyword_dict":{"Python":["python"]},"explanation":"Keyword at the beginning of the sentence.","keywords":["Python"],"keywords_case_sensitive":["Python"]},{"sentence":"I like python also","keyword_dict":{"Python":["python"]},"explanation":"Keyword before the end of the sentence.","keywords":["Python"],"keywords_case_sensitive":["Python"]},{"sentence":"I like python java","keyword_dict":{"Python":["python"],"Java":["java"]},"explanation":"Multiple keywords in the end of the sentence.","keywords":["Python","Java"],"keywords_case_sensitive":["Python","Java"]},{"sentence":"I like python and java","keyword_dict":{"Python":["python"],"Java":["java"]},"explanation":"Multiple keywords in the sentence with other word in between.","keywords":["Python","Java"],"keywords_case_sensitive":["Python","Java"]},{"sentence":"python","keyword_dict":{"Python":["python"]},"explanation":"Single keyword in the sentence.","keywords":["Python"],"keywords_case_sensitive":["Python"]},{"sentence":" python","keyword_dict":{"Python":["python"]},"explanation":"Single keyword in the sentence with space prefix.","keywords":["Python"],"keywords_case_sensitive":["Python"]},{"sentence":"I like r","keyword_dict":{"R":["r"]},"explanation":"Single char keyword at the end of the sentence.","keywords":["R"],"keywords_case_sensitive":["R"]},{"sentence":"r I like","keyword_dict":{"R":["r"]},"explanation":"Single char keyword at the beginning of the sentence.","keywords":["R"],"keywords_case_sensitive":["R"]},{"sentence":"I like R also","keyword_dict":{"R":["r"]},"explanation":"Single char keyword before the end of the sentence.","keywords":["R"],"keywords_case_sensitive":[]},{"sentence":"I like R java","keyword_dict":{"R":["r"],"Java":["java"]},"explanation":"Multiple keywords in the end of the sentence.","keywords":["R","Java"],"keywords_case_sensitive":["Java"]},{"sentence":"I like R and java","keyword_dict":{"R":["R"],"Java":["java"]},"explanation":"Multiple keywords in the sentence with other word in between.","keywords":["R","Java"],"keywords_case_sensitive":["R","Java"]},{"sentence":"R","keyword_dict":{"R":["r"]},"explanation":"Single character keyword in the sentence.","keywords":["R"],"keywords_case_sensitive":[]},{"sentence":" R","keyword_dict":{"R":["R"]},"explanation":"Single character keyword in the sentence with space prefix.","keywords":["R"],"keywords_case_sensitive":["R"]},{"sentence":"I like distributed super computing","keyword_dict":{"Distributed Super Computing":["distributed super computing"]},"explanation":"Multi word Keyword at the end of the sentence.","keywords":["Distributed Super Computing"],"keywords_case_sensitive":["Distributed Super Computing"]},{"sentence":"distributed super computing I like","keyword_dict":{"Distributed Super Computing":["distributed super computing"]},"explanation":"Multi word Keyword at the beginning of the sentence.","keywords":["Distributed Super Computing"],"keywords_case_sensitive":["Distributed Super Computing"]},{"sentence":"I like distributed super computing also","keyword_dict":{"Distributed Super Computing":["distributed super computing"]},"explanation":"Multi word Keyword before the end of the sentence.","keywords":["Distributed Super Computing"],"keywords_case_sensitive":["Distributed Super Computing"]},{"sentence":"I like distributed super computing java","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Java":["java"]},"explanation":"Multi word Keyword at the end of the sentence.","keywords":["Distributed Super Computing","Java"],"keywords_case_sensitive":["Distributed Super Computing","Java"]},{"sentence":"I like distributed super computing java programing","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Java":["java programing"]},"explanation":"Multiple Multi word Keyword at the end of the sentence.","keywords":["Distributed Super Computing","Java"],"keywords_case_sensitive":["Distributed Super Computing","Java"]},{"sentence":"I like distributed super computing and java","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Java":["java"]},"explanation":"Multiple keywords in the sentence with other word in between.","keywords":["Distributed Super Computing","Java"],"keywords_case_sensitive":["Distributed Super Computing","Java"]},{"sentence":"distributed super computing","keyword_dict":{"Distributed Super Computing":["distributed super computing"]},"explanation":"Single Multi word Keyword in the sentence.","keywords":["Distributed Super Computing"],"keywords_case_sensitive":["Distributed Super Computing"]},{"sentence":" distributed super computing","keyword_dict":{"Distributed Super Computing":["distributed super computing"]},"explanation":"Single Multi word Keyword in the sentence with space prefix.","keywords":["Distributed Super Computing"],"keywords_case_sensitive":["Distributed Super Computing"]},{"sentence":"distributed super computing distributed super computing","keyword_dict":{"Distributed Super Computing":["distributed super computing"]},"explanation":"Multi word Keyword twice","keywords":["Distributed Super Computing","Distributed Super Computing"],"keywords_case_sensitive":["Distributed Super Computing","Distributed Super Computing"]},{"sentence":"distributed super distributed super computing","keyword_dict":{"Distributed Super Computing":["distributed super computing"]},"explanation":"Multi word Keyword partial then complete.","keywords":["Distributed Super Computing"],"keywords_case_sensitive":["Distributed Super Computing"]},{"sentence":"distributed super distributed super computing java","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Java":["java"]},"explanation":"","keywords":["Distributed Super Computing","Java"],"keywords_case_sensitive":["Distributed Super Computing","Java"]},{"sentence":"distributed super distributed super computing institute","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Distributed Super Computing Institute":["distributed super computing institute"]},"explanation":"","keywords":["Distributed Super Computing Institute"],"keywords_case_sensitive":["Distributed Super Computing Institute"]},{"sentence":"distributed super distributed super computing insti","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Distributed Super Computing Institute":["distributed super computing institute"]},"explanation":"","keywords":["Distributed Super Computing"],"keywords_case_sensitive":["Distributed Super Computing"]},{"sentence":"distributed super distributed super computing insti java","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Distributed Super Computing Institute":["distributed super computing institute"],"Java":["java"]},"explanation":"","keywords":["Distributed Super Computing","Java"],"keywords_case_sensitive":["Distributed Super Computing","Java"]},{"sentence":"distributed super distributed super computing institute java","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Distributed Super Computing Institute":["distributed super computing institute"],"Java":["java"]},"explanation":"","keywords":["Distributed Super Computing Institute","Java"],"keywords_case_sensitive":["Distributed Super Computing Institute","Java"]},{"sentence":"distributed super distributed super computing institute and java","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Distributed Super Computing Institute":["distributed super computing institute"],"Java":["java"]},"explanation":"","keywords":["Distributed Super Computing Institute","Java"],"keywords_case_sensitive":["Distributed Super Computing Institute","Java"]},{"sentence":"distributed super distributed super computing insti r","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Distributed Super Computing Institute":["distributed super computing institute"],"R":["r"]},"explanation":"","keywords":["Distributed Super Computing","R"],"keywords_case_sensitive":["Distributed Super Computing","R"]},{"sentence":"distributed super distributed super computing institute r","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Distributed Super Computing Institute":["distributed super computing institute"],"R":["r"]},"explanation":"","keywords":["Distributed Super Computing Institute","R"],"keywords_case_sensitive":["Distributed Super Computing Institute","R"]},{"sentence":"distributed super distributed super computing institute and r","keyword_dict":{"Distributed Super Computing":["distributed super computing"],"Distributed Super Computing Institute":["distributed super computing institute"],"R":["r"]},"explanation":"","keywords":["Distributed Super Computing Institute","R"],"keywords_case_sensitive":["Distributed Super Computing Institute","R"]},{"sentence":"distributed pronoun game","keyword_dict":{"Distributed Programing":["distributed programing"],"Pronoun Game":["pronoun game"]},"explanation":"","keywords":["Pronoun Game"],"keywords_case_sensitive":["Pronoun Game"]},{"sentence":"distributed super computer game","keyword_dict":{"Distributed Super Computer":["distributed super computer"],"Computer Game":["computer game"]},"explanation":"","keywords":["Distributed Super Computer"],"keywords_case_sensitive":["Distributed Super Computer"]},{"sentence":"distributed super computer game","keyword_dict":{"Distributed Super Company":["distributed super company"],"Computer Game":["computer game"]},"explanation":"","keywords":["Computer Game"],"keywords_case_sensitive":["Computer Game"]},{"sentence":"distributed super computer game","keyword_dict":{"Distributed Super Company":["distributed super company"],"Super Computer":["super computer"],"Computer Game":["computer game"]},"explanation":"","keywords":["Super Computer"],"keywords_case_sensitive":["Super Computer"]},{"sentence":"distributed super compute game","keyword_dict":{"Distributed Super Company":["distributed super company"],"Super Computer":["super computer"],"Computer Game":["computer game"]},"explanation":"","keywords":[],"keywords_case_sensitive":[]},{"sentence":"computer game development","keyword_dict":{"Computer Game":["computer game"],"Computer Game Development":["computer game development"]},"explanation":"","keywords":["Computer Game Development"],"keywords_case_sensitive":["Computer Game Development"]},{"sentence":"computer game development","keyword_dict":{"Computer Gaming":["computer gaming"],"Computer Game Development":["computer game development"]},"explanation":"","keywords":["Computer Game Development"],"keywords_case_sensitive":["Computer Game Development"]},{"sentence":"I like .net","keyword_dict":{".NET":[".net"]},"explanation":"keyword with special character","keywords":[".NET"],"keywords_case_sensitive":[".NET"]},{"sentence":"I like c++","keyword_dict":{"Cpp":["c++"]},"explanation":"keyword with special character","keywords":["Cpp"],"keywords_case_sensitive":["Cpp"]},{"sentence":"python.","keyword_dict":{"Python":["python."]},"explanation":"Ending with special character","keywords":["Python"],"keywords_case_sensitive":["Python"]},{"sentence":"python ","keyword_dict":{"Python":["python"]},"explanation":"Ending with special character","keywords":["Python"],"keywords_case_sensitive":["Python"]},{"sentence":"i like python programming","keyword_dict":{"Python":["python prog"]},"explanation":"Negative test case","keywords":[],"keywords_case_sensitive":[]},{"sentence":"distributed super distributed super computing institute java","keyword_dict":{"Java":["java"],"Distributed Super Computing Institutes":["distributed super computing institutes"],"Institute":["institute"],"Distributed Super Computing":["distributed super computing"]},"explanation":"Negative test case","keywords":["Distributed Super Computing","Institute","Java"],"keywords_case_sensitive":["Distributed Super Computing","Institute","Java"]},{"sentence":"targets relative to targets of the IRE1/XBP1s and PERK arms of the UPR","keyword_dict":{"IRE1":["IRE1"],"XBP1s":["XBP1s"],"UPR":["upr"]},"explanation":"","keywords":["IRE1","XBP1s","UPR"],"keywords_case_sensitive":["IRE1","XBP1s"]},{"sentence":"spring framework","keyword_dict":{"spring framework":["spring","spring framework"],"framework":["framework"]},"explanation":"","keywords":["spring framework"],"keywords_case_sensitive":["spring framework"]}]
\ No newline at end of file
diff --git a/tests/unit/data/flashtext/keywords_format_one.txt b/tests/unit/data/flashtext/keywords_format_one.txt
new file mode 100644
index 0000000000..17b6a16f4e
--- /dev/null
+++ b/tests/unit/data/flashtext/keywords_format_one.txt
@@ -0,0 +1,4 @@
+java_2e=>java
+java programing=>java
+product management=>product management
+product management techniques=>product management
\ No newline at end of file
diff --git a/tests/unit/data/flashtext/keywords_format_two.txt b/tests/unit/data/flashtext/keywords_format_two.txt
new file mode 100644
index 0000000000..352be1d153
--- /dev/null
+++ b/tests/unit/data/flashtext/keywords_format_two.txt
@@ -0,0 +1,2 @@
+java
+product management
\ No newline at end of file
diff --git a/tests/unit/prediction/ocr/test_location.py b/tests/unit/prediction/ocr/test_location.py
index 6af91b1ab0..dec32a6504 100644
--- a/tests/unit/prediction/ocr/test_location.py
+++ b/tests/unit/prediction/ocr/test_location.py
@@ -1,5 +1,4 @@
 import pytest
-from flashtext import KeywordProcessor
 
 from robotoff import settings
 from robotoff.prediction.ocr.location import (
@@ -8,6 +7,7 @@
     find_locations,
     load_cities_fr,
 )
+from robotoff.utils.text import KeywordProcessor
 
 module = "robotoff.prediction.ocr.location"
 
diff --git a/tests/unit/utils/text/flashtext/__init__.py b/tests/unit/utils/text/flashtext/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/unit/utils/text/flashtext/test_dictionary_loading.py b/tests/unit/utils/text/flashtext/test_dictionary_loading.py
new file mode 100644
index 0000000000..78b165059e
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_dictionary_loading.py
@@ -0,0 +1,37 @@
+import logging
+import unittest
+
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestDictionaryLoad(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_dictionary_loading(self):
+        keyword_processor = KeywordProcessor()
+        keyword_dict = {
+            "java": ["java_2e", "java programing"],
+            "product management": [
+                "product management techniques",
+                "product management",
+            ],
+        }
+        keyword_processor.add_keywords_from_dict(keyword_dict)
+
+        sentence = "I know java_2e and product management techniques"
+        keywords_extracted = keyword_processor.extract_keywords(sentence)
+        self.assertEqual(
+            keywords_extracted,
+            ["java", "product management"],
+            "Failed file format one test",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_extract_fuzzy.py b/tests/unit/utils/text/flashtext/test_extract_fuzzy.py
new file mode 100644
index 0000000000..c3a8fbf8ca
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_extract_fuzzy.py
@@ -0,0 +1,186 @@
+import logging
+import unittest
+
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestExtractFuzzy(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_extract_deletion(self):
+        """
+        Fuzzy deletion
+        """
+        keyword_proc = KeywordProcessor()
+        for keyword in (("skype", "messenger"),):
+            keyword_proc.add_keyword(*keyword)
+
+        sentence = "hello, do you have skpe ?"
+        extracted_keywords = [("messenger", 19, 23)]
+        self.assertEqual(
+            keyword_proc.extract_keywords(sentence, span_info=True, max_cost=1),
+            extracted_keywords,
+        )
+
+    def test_extract_addition(self):
+        """
+        Fuzzy addition
+        """
+        keyword_proc = KeywordProcessor()
+        for keyword in (("colour here", "couleur ici"), ("and heere", "et ici")):
+            keyword_proc.add_keyword(*keyword)
+
+        sentence = "color here blabla and here"
+
+        extracted_keywords = [("couleur ici", 0, 10), ("et ici", 18, 26)]
+        self.assertListEqual(
+            keyword_proc.extract_keywords(sentence, span_info=True, max_cost=1),
+            extracted_keywords,
+        )
+
+    def test_correct_keyword_on_addition(self):
+        """
+        Test for simple additions using the levensthein function
+        We ensure we end up on the right node in the trie when starting from the current node
+        """
+        keyword_proc = KeywordProcessor()
+        for keyword in (("colour here", "couleur ici"), ("and heere", "et ici")):
+            keyword_proc.add_keyword(*keyword)
+
+        current_dict = keyword_proc.keyword_trie_dict["c"]["o"]["l"]["o"]
+        closest_node, cost, depth = next(
+            keyword_proc.levensthein("r", max_cost=1, start_node=current_dict),
+            ({}, 0, 0),
+        )
+        self.assertDictEqual(closest_node, current_dict["u"]["r"])
+        self.assertEqual(cost, 1)
+        self.assertEqual(depth, 2)
+
+        current_dict_continued = {"e": {"e": {"r": {"e": {"_keyword_": "et ici"}}}}}
+        closest_node, cost, depth = next(
+            keyword_proc.levensthein(
+                "ere", max_cost=1, start_node=current_dict_continued
+            ),
+            ({}, 0, 0),
+        )
+        self.assertDictEqual(closest_node, current_dict_continued["e"]["e"]["r"]["e"])
+        self.assertEqual(cost, 1)
+        self.assertEqual(depth, 4)
+
+    def test_correct_keyword_on_deletion(self):
+        """
+        Test for simple deletions using the levensthein function
+        We ensure we end up on the right node in the trie when starting from the current node
+        """
+        keyword_proc = KeywordProcessor()
+        keyword_proc.add_keyword("skype")
+        current_dict = {"y": {"p": {"e": {"_keyword_": "skype"}}}}
+
+        closest_node, cost, depth = next(
+            keyword_proc.levensthein("pe", max_cost=1, start_node=current_dict),
+            ({}, 0, 0),
+        )
+
+        self.assertDictEqual(closest_node, current_dict["y"]["p"]["e"])
+        self.assertEqual(cost, 1)
+        self.assertEqual(depth, 3)
+
+    def test_correct_keyword_on_substitution(self):
+        """
+        Test for simple substitions using the levensthein function
+        We ensure we end up on the right node in the trie when starting from the current node
+        """
+        keyword_proc = KeywordProcessor()
+        for keyword in (("skype", "messenger"),):
+            keyword_proc.add_keyword(*keyword)
+
+        current_dict = keyword_proc.keyword_trie_dict["s"]["k"]
+        closest_node, cost, depth = next(
+            keyword_proc.levensthein("ope", max_cost=1, start_node=current_dict),
+            ({}, 0, 0),
+        )
+        self.assertDictEqual(closest_node, current_dict["y"]["p"]["e"])
+        self.assertEqual(cost, 1)
+        self.assertEqual(depth, 3)
+
+    def test_extract_cost_spread_over_multiple_words(self):
+        """
+        Here we try to extract a keyword made of different words
+        the current cost should be decreased by one when encountering 'maade' (1 insertion)
+        and again by one when encountering 'multple' (1 deletion)
+        """
+        keyword_proc = KeywordProcessor()
+        keyword_made_of_multiple_words = "made of multiple words"
+        keyword_proc.add_keyword(keyword_made_of_multiple_words)
+        sentence = "this sentence contains a keyword maade of multple words"
+
+        extracted_keywords = [(keyword_made_of_multiple_words, 33, 55)]
+        self.assertEqual(
+            keyword_proc.extract_keywords(sentence, span_info=True, max_cost=2),
+            extracted_keywords,
+        )
+
+    def test_extract_multiple_keywords(self):
+        keyword_proc = KeywordProcessor()
+        keyword_proc.add_keyword("first keyword")
+        keyword_proc.add_keyword("second keyword")
+        sentence = "starts with a first kyword then add a secand keyword"
+        extracted_keywords = [
+            ("first keyword", 14, 26),
+            ("second keyword", 38, 52),
+        ]
+        self.assertEqual(
+            keyword_proc.extract_keywords(sentence, span_info=True, max_cost=1),
+            extracted_keywords,
+        )
+
+    def test_intermediate_match(self):
+        """
+        In this test, we have an intermediate fuzzy match with a keyword (the shortest one)
+        We first check that we extract the longest keyword if the max_cost is big enough
+        Then we retry with a smaller max_cost, excluding the longest, and check that the shortest is extracted
+        """
+        keyword_proc = KeywordProcessor()
+        keyword_proc.add_keyword("keyword")
+        keyword_proc.add_keyword("keyword with many words")
+        sentence = "This sentence contains a keywrd with many woords"
+
+        shortest_keyword = ("keyword", 25, 31)
+        longest_keyword = ("keyword with many words", 25, 48)
+
+        self.assertEqual(
+            keyword_proc.extract_keywords(sentence, span_info=True, max_cost=2),
+            [longest_keyword],
+        )
+        self.assertEqual(
+            keyword_proc.extract_keywords(sentence, span_info=True, max_cost=1),
+            [shortest_keyword],
+        )
+
+    def test_intermediate_match_then_no_match(self):
+        """
+        In this test, we have an intermediate fuzzy match with a keyword (the shortest one)
+        We check that we get only the shortest keyword when going further into fuzzy match is too
+        expansive to get the longest keyword. We also extract a classic match later in the string,
+        to check that the inner data structures all have a correct state
+        """
+        keyword_proc = KeywordProcessor()
+        keyword_proc.add_keyword("keyword")
+        keyword_proc.add_keyword("keyword with many words")
+        sentence = "This sentence contains a keywrd with many items inside, a keyword at the end"
+
+        keywords = [("keyword", 25, 31), ("keyword", 58, 65)]
+        self.assertEqual(
+            keyword_proc.extract_keywords(sentence, span_info=True, max_cost=2),
+            keywords,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_extractor.py b/tests/unit/utils/text/flashtext/test_extractor.py
new file mode 100644
index 0000000000..f8645e45e9
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_extractor.py
@@ -0,0 +1,83 @@
+import json
+import logging
+import unittest
+
+from robotoff import settings
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestKeywordExtractor(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+        with open(
+            settings.TEST_DATA_DIR / "flashtext/keyword_extractor_test_cases.json"
+        ) as f:
+            self.test_cases = json.load(f)
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_extract_keywords(self):
+        """For each of the test case initialize a new KeywordProcessor.
+        Add the keywords the test case to KeywordProcessor.
+        Extract keywords and check if they match the expected result for the test case.
+
+        """
+        for test_id, test_case in enumerate(self.test_cases):
+            keyword_processor = KeywordProcessor()
+            keyword_processor.add_keywords_from_dict(test_case["keyword_dict"])
+            keywords_extracted = keyword_processor.extract_keywords(
+                test_case["sentence"]
+            )
+            self.assertEqual(
+                keywords_extracted,
+                test_case["keywords"],
+                "keywords_extracted don't match the expected results for test case: {}".format(
+                    test_id
+                ),
+            )
+
+    def test_extract_keywords_case_sensitive(self):
+        """For each of the test case initialize a new KeywordProcessor.
+        Add the keywords the test case to KeywordProcessor.
+        Extract keywords and check if they match the expected result for the test case.
+
+        """
+        for test_id, test_case in enumerate(self.test_cases):
+            keyword_processor = KeywordProcessor(case_sensitive=True)
+            keyword_processor.add_keywords_from_dict(test_case["keyword_dict"])
+            keywords_extracted = keyword_processor.extract_keywords(
+                test_case["sentence"]
+            )
+            self.assertEqual(
+                keywords_extracted,
+                test_case["keywords_case_sensitive"],
+                "keywords_extracted don't match the expected results for test case: {}".format(
+                    test_id
+                ),
+            )
+
+    def test_extract_keywords_case_insensitive_with_string_length_change(self):
+        sentence = "Word İngredients LTD İmages nutriments i̇ngredients PROTEİNS"
+        keyword_processor = KeywordProcessor(case_sensitive=False)
+        keyword_processor.add_keyword("İngredients", "ingredients")
+        keyword_processor.add_keyword("nutriments", "nutriments")
+        keyword_processor.add_keyword("PROTEİNS", "proteins")
+        extracted_keywords = keyword_processor.extract_keywords(
+            sentence, span_info=True
+        )
+        self.assertEqual(
+            extracted_keywords,
+            [
+                ("ingredients", 5, 16),
+                ("nutriments", 28, 38),
+                ("ingredients", 39, 51),
+                ("proteins", 52, 60),
+            ],
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_file_load.py b/tests/unit/utils/text/flashtext/test_file_load.py
new file mode 100644
index 0000000000..6544a1a763
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_file_load.py
@@ -0,0 +1,45 @@
+import logging
+import unittest
+
+from robotoff import settings
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestFileLoad(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_file_format_one(self):
+        keyword_processor = KeywordProcessor()
+        keyword_processor.add_keyword_from_file(
+            settings.TEST_DATA_DIR / "flashtext/keywords_format_one.txt"
+        )
+        sentence = "I know java_2e and product management techniques"
+        keywords_extracted = keyword_processor.extract_keywords(sentence)
+        self.assertEqual(
+            keywords_extracted,
+            ["java", "product management"],
+            "Failed file format one test",
+        )
+
+    def test_file_format_two(self):
+        keyword_processor = KeywordProcessor()
+        keyword_processor.add_keyword_from_file(
+            settings.TEST_DATA_DIR / "flashtext/keywords_format_two.txt"
+        )
+        sentence = "I know java and product management"
+        keywords_extracted = keyword_processor.extract_keywords(sentence)
+        self.assertEqual(
+            keywords_extracted,
+            ["java", "product management"],
+            "Failed file format one test",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_kp_exceptions.py b/tests/unit/utils/text/flashtext/test_kp_exceptions.py
new file mode 100644
index 0000000000..48a994c63a
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_kp_exceptions.py
@@ -0,0 +1,66 @@
+import logging
+import unittest
+
+import pytest
+
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestKPExceptions(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_iterator_NotImplementedError(self):
+        keyword_processor = KeywordProcessor()
+        keyword_processor.add_keyword("j2ee", "Java")
+        keyword_processor.add_keyword("colour", "color")
+        keyword_processor.get_all_keywords()
+        with pytest.raises(NotImplementedError):
+            for _ in keyword_processor:
+                pass
+
+    def test_add_keyword_file_missing(self):
+        keyword_processor = KeywordProcessor()
+        with pytest.raises(IOError):
+            keyword_processor.add_keyword_from_file("missing_file")
+
+    def test_add_keyword_from_list(self):
+        keyword_processor = KeywordProcessor()
+        keyword_list = "java"
+        with pytest.raises(AttributeError):
+            keyword_processor.add_keywords_from_list(keyword_list)
+
+    def test_add_keyword_from_dictionary(self):
+        keyword_processor = KeywordProcessor()
+        keyword_dict = {"java": "java_2e", "product management": "product manager"}
+        with pytest.raises(AttributeError):
+            keyword_processor.add_keywords_from_dict(keyword_dict)
+
+    def test_remove_keyword_from_list(self):
+        keyword_processor = KeywordProcessor()
+        keyword_list = "java"
+        with pytest.raises(AttributeError):
+            keyword_processor.remove_keywords_from_list(keyword_list)
+
+    def test_remove_keyword_from_dictionary(self):
+        keyword_processor = KeywordProcessor()
+        keyword_dict = {"java": "java_2e", "product management": "product manager"}
+        with pytest.raises(AttributeError):
+            keyword_processor.remove_keywords_from_dict(keyword_dict)
+
+    def test_empty_string(self):
+        keyword_processor = KeywordProcessor()
+        self.assertEqual(
+            keyword_processor.extract_keywords(""),
+            [],
+            "new_sentence don't match the expected result",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_kp_extract_span.py b/tests/unit/utils/text/flashtext/test_kp_extract_span.py
new file mode 100644
index 0000000000..b7d3e5572b
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_kp_extract_span.py
@@ -0,0 +1,70 @@
+import json
+import logging
+import unittest
+
+from robotoff import settings
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestKPExtractorSpan(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+        with open(
+            settings.TEST_DATA_DIR / "flashtext/keyword_extractor_test_cases.json"
+        ) as f:
+            self.test_cases = json.load(f)
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_extract_keywords(self):
+        """For each of the test case initialize a new KeywordProcessor.
+        Add the keywords the test case to KeywordProcessor.
+        Extract keywords and check if they match the expected result for the test case.
+
+        """
+        for test_id, test_case in enumerate(self.test_cases):
+            keyword_processor = KeywordProcessor()
+            for key in test_case["keyword_dict"]:
+                keyword_processor.add_keywords_from_list(test_case["keyword_dict"][key])
+            keywords_extracted = keyword_processor.extract_keywords(
+                test_case["sentence"], span_info=True
+            )
+            for kwd in keywords_extracted:
+                # returned keyword lowered should match the sapn from sentence
+                self.assertEqual(
+                    kwd[0].lower(),
+                    test_case["sentence"].lower()[kwd[1] : kwd[2]],
+                    "keywords span don't match the expected results for test case: {}".format(
+                        test_id
+                    ),
+                )
+
+    def test_extract_keywords_case_sensitive(self):
+        """For each of the test case initialize a new KeywordProcessor.
+        Add the keywords the test case to KeywordProcessor.
+        Extract keywords and check if they match the expected result for the test case.
+
+        """
+        for test_id, test_case in enumerate(self.test_cases):
+            keyword_processor = KeywordProcessor(case_sensitive=True)
+            for key in test_case["keyword_dict"]:
+                keyword_processor.add_keywords_from_list(test_case["keyword_dict"][key])
+            keywords_extracted = keyword_processor.extract_keywords(
+                test_case["sentence"], span_info=True
+            )
+            for kwd in keywords_extracted:
+                # returned keyword should match the sapn from sentence
+                self.assertEqual(
+                    kwd[0],
+                    test_case["sentence"][kwd[1] : kwd[2]],
+                    "keywords span don't match the expected results for test case: {}".format(
+                        test_id
+                    ),
+                )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_kp_get_all_keywords.py b/tests/unit/utils/text/flashtext/test_kp_get_all_keywords.py
new file mode 100644
index 0000000000..ea8eb78599
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_kp_get_all_keywords.py
@@ -0,0 +1,29 @@
+import logging
+import unittest
+
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestKPGetAllKeywords(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_get_all_keywords(self):
+        keyword_processor = KeywordProcessor()
+        keyword_processor.add_keyword("j2ee", "Java")
+        keyword_processor.add_keyword("colour", "color")
+        keyword_processor.get_all_keywords()
+        self.assertEqual(
+            keyword_processor.get_all_keywords(),
+            {"colour": "color", "j2ee": "Java"},
+            "get_all_keywords didn't match expected results.",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_kp_len.py b/tests/unit/utils/text/flashtext/test_kp_len.py
new file mode 100644
index 0000000000..b4b8939405
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_kp_len.py
@@ -0,0 +1,62 @@
+import json
+import logging
+import sys
+import unittest
+from collections import defaultdict
+
+from robotoff import settings
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+logger.level = logging.DEBUG
+stream_handler = logging.StreamHandler(sys.stdout)
+logger.addHandler(stream_handler)
+
+
+class TestKPLen(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+        with open(
+            settings.TEST_DATA_DIR / "flashtext/keyword_remover_test_cases.json"
+        ) as f:
+            self.test_cases = json.load(f)
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_remove_keywords_dictionary_len(self):
+        """For each of the test case initialize a new KeywordProcessor.
+        Add the keywords the test case to KeywordProcessor.
+        Remove the keywords in remove_keyword_dict
+        Extract keywords and check if they match the expected result for the test case.
+        """
+        for test_id, test_case in enumerate(self.test_cases):
+            keyword_processor = KeywordProcessor()
+            keyword_processor.add_keywords_from_dict(test_case["keyword_dict"])
+            keyword_processor.remove_keywords_from_dict(
+                test_case["remove_keyword_dict"]
+            )
+
+            kp_len = len(keyword_processor)
+
+            new_dictionary = defaultdict(list)
+            for key, values in test_case["keyword_dict"].items():
+                for value in values:
+                    if not (
+                        key in test_case["remove_keyword_dict"]
+                        and value in test_case["remove_keyword_dict"][key]
+                    ):
+                        new_dictionary[key].append(value)
+
+            keyword_processor_two = KeywordProcessor()
+            keyword_processor_two.add_keywords_from_dict(new_dictionary)
+            kp_len_two = len(keyword_processor_two)
+            self.assertEqual(
+                kp_len,
+                kp_len_two,
+                "keyword processor length doesn't match for Text ID {}".format(test_id),
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_kp_next_word.py b/tests/unit/utils/text/flashtext/test_kp_next_word.py
new file mode 100644
index 0000000000..5a1d1ce648
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_kp_next_word.py
@@ -0,0 +1,27 @@
+import logging
+import unittest
+
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestKPNextWord(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_next_word(self):
+        """
+        Test for next word extraction
+        """
+        keyword_proc = KeywordProcessor()
+        self.assertEqual(keyword_proc.get_next_word(""), "")
+        self.assertEqual(keyword_proc.get_next_word("random sentence"), "random")
+        self.assertEqual(keyword_proc.get_next_word(" random sentence"), "")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_kp_term_in_kp.py b/tests/unit/utils/text/flashtext/test_kp_term_in_kp.py
new file mode 100644
index 0000000000..49d03acbfb
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_kp_term_in_kp.py
@@ -0,0 +1,72 @@
+import logging
+import unittest
+
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestKPDictionaryLikeFeatures(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_term_in_dictionary(self):
+        keyword_processor = KeywordProcessor()
+        keyword_processor.add_keyword("j2ee", "Java")
+        keyword_processor.add_keyword("colour", "color")
+        keyword_processor.get_keyword("j2ee")
+        self.assertEqual(
+            keyword_processor.get_keyword("j2ee"),
+            "Java",
+            "get_keyword didn't return expected Keyword",
+        )
+        self.assertEqual(
+            keyword_processor["colour"],
+            "color",
+            "get_keyword didn't return expected Keyword",
+        )
+        self.assertEqual(
+            keyword_processor["Test"],
+            None,
+            "get_keyword didn't return expected Keyword",
+        )
+        self.assertTrue(
+            "colour" in keyword_processor, "get_keyword didn't return expected Keyword"
+        )
+        self.assertFalse(
+            "Test" in keyword_processor, "get_keyword didn't return expected Keyword"
+        )
+
+    def test_term_in_dictionary_case_sensitive(self):
+        keyword_processor = KeywordProcessor(case_sensitive=True)
+        keyword_processor.add_keyword("j2ee", "Java")
+        keyword_processor.add_keyword("colour", "color")
+        keyword_processor.get_keyword("j2ee")
+        self.assertEqual(
+            keyword_processor.get_keyword("j2ee"),
+            "Java",
+            "get_keyword didn't return expected Keyword",
+        )
+        self.assertEqual(
+            keyword_processor["colour"],
+            "color",
+            "get_keyword didn't return expected Keyword",
+        )
+        self.assertEqual(
+            keyword_processor["J2ee"],
+            None,
+            "get_keyword didn't return expected Keyword",
+        )
+        self.assertTrue(
+            "colour" in keyword_processor, "get_keyword didn't return expected Keyword"
+        )
+        self.assertFalse(
+            "Colour" in keyword_processor, "get_keyword didn't return expected Keyword"
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_loading_keyword_list.py b/tests/unit/utils/text/flashtext/test_loading_keyword_list.py
new file mode 100644
index 0000000000..ea226d3051
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_loading_keyword_list.py
@@ -0,0 +1,30 @@
+import logging
+import unittest
+
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestListLoad(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_list_loading(self):
+        keyword_processor = KeywordProcessor()
+        keyword_list = ["java", "product management"]
+        keyword_processor.add_keywords_from_list(keyword_list)
+        sentence = "I know java and product management"
+        keywords_extracted = keyword_processor.extract_keywords(sentence)
+        self.assertEqual(
+            keywords_extracted,
+            ["java", "product management"],
+            "Failed file format one test",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/text/flashtext/test_remove_keywords.py b/tests/unit/utils/text/flashtext/test_remove_keywords.py
new file mode 100644
index 0000000000..ba6c1623d2
--- /dev/null
+++ b/tests/unit/utils/text/flashtext/test_remove_keywords.py
@@ -0,0 +1,105 @@
+import json
+import logging
+import unittest
+from collections import defaultdict
+
+from robotoff import settings
+from robotoff.utils.text import KeywordProcessor
+
+logger = logging.getLogger(__name__)
+
+
+class TestKeywordRemover(unittest.TestCase):
+    def setUp(self):
+        logger.info("Starting...")
+        with open(
+            settings.TEST_DATA_DIR / "flashtext/keyword_remover_test_cases.json"
+        ) as f:
+            self.test_cases = json.load(f)
+
+    def tearDown(self):
+        logger.info("Ending.")
+
+    def test_remove_keywords(self):
+        """For each of the test case initialize a new KeywordProcessor.
+        Add the keywords the test case to KeywordProcessor.
+        Remove the keywords in remove_keyword_dict
+        Extract keywords and check if they match the expected result for the test case.
+        """
+        for test_id, test_case in enumerate(self.test_cases):
+            keyword_processor = KeywordProcessor()
+            keyword_processor.add_keywords_from_dict(test_case["keyword_dict"])
+            keyword_processor.remove_keywords_from_dict(
+                test_case["remove_keyword_dict"]
+            )
+            keywords_extracted = keyword_processor.extract_keywords(
+                test_case["sentence"]
+            )
+            self.assertEqual(
+                keywords_extracted,
+                test_case["keywords"],
+                "keywords_extracted don't match the expected results for test case: {}".format(
+                    test_id
+                ),
+            )
+
+    def test_remove_keywords_using_list(self):
+        """For each of the test case initialize a new KeywordProcessor.
+        Add the keywords the test case to KeywordProcessor.
+        Remove the keywords in remove_keyword_dict
+        Extract keywords and check if they match the expected result for the test case.
+        """
+        for test_id, test_case in enumerate(self.test_cases):
+            keyword_processor = KeywordProcessor()
+            keyword_processor.add_keywords_from_dict(test_case["keyword_dict"])
+            for key in test_case["remove_keyword_dict"]:
+                keyword_processor.remove_keywords_from_list(
+                    test_case["remove_keyword_dict"][key]
+                )
+            keywords_extracted = keyword_processor.extract_keywords(
+                test_case["sentence"]
+            )
+            self.assertEqual(
+                keywords_extracted,
+                test_case["keywords"],
+                "keywords_extracted don't match the expected results for test case: {}".format(
+                    test_id
+                ),
+            )
+
+    def test_remove_keywords_dictionary_compare(self):
+        """For each of the test case initialize a new KeywordProcessor.
+        Add the keywords the test case to KeywordProcessor.
+        Remove the keywords in remove_keyword_dict
+        Extract keywords and check if they match the expected result for the test case.
+        """
+        for test_id, test_case in enumerate(self.test_cases):
+            keyword_processor = KeywordProcessor()
+            keyword_processor.add_keywords_from_dict(test_case["keyword_dict"])
+            keyword_processor.remove_keywords_from_dict(
+                test_case["remove_keyword_dict"]
+            )
+            keyword_trie_dict = keyword_processor.keyword_trie_dict
+
+            new_dictionary = defaultdict(list)
+            for key, values in test_case["keyword_dict"].items():
+                for value in values:
+                    if not (
+                        key in test_case["remove_keyword_dict"]
+                        and value in test_case["remove_keyword_dict"][key]
+                    ):
+                        new_dictionary[key].append(value)
+
+            keyword_processor_two = KeywordProcessor()
+            keyword_processor_two.add_keywords_from_dict(new_dictionary)
+            keyword_trie_dict_two = keyword_processor_two.keyword_trie_dict
+            self.assertTrue(
+                keyword_trie_dict == keyword_trie_dict_two,
+                "keywords_extracted don't match the expected results for test case: {}".format(
+                    test_id
+                ),
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/utils/test_text.py b/tests/unit/utils/text/test_text.py
similarity index 100%
rename from tests/unit/utils/test_text.py
rename to tests/unit/utils/text/test_text.py