huggingface · Wauplin · Jul 4, 2023 · Jun 21, 2023 · Jun 21, 2023 · Jun 21, 2023
diff --git a/.github/workflows/python-quality.yml b/.github/workflows/python-quality.yml
@@ -31,6 +31,7 @@ jobs:
       - run: ruff tests src contrib
       - run: python utils/check_contrib_list.py
       - run: python utils/check_static_imports.py
+      - run: python utils/generate_async_inference_client.py
 
       # Run type checking at least on huggingface_hub root file to check all modules
       # that can be lazy-loaded actually exist.

diff --git a/Makefile b/Makefile
@@ -10,12 +10,14 @@ quality:
 	mypy src
 	python utils/check_contrib_list.py
 	python utils/check_static_imports.py
+	python utils/generate_async_inference_client.py
 
 style:
 	black $(check_dirs)
 	ruff $(check_dirs) --fix
 	python utils/check_contrib_list.py --update
 	python utils/check_static_imports.py --update
+	python utils/generate_async_inference_client.py --update
 
 repocard:
 	python utils/push_repocard_examples.py

diff --git a/docs/source/guides/inference.md b/docs/source/guides/inference.md
@@ -175,6 +175,36 @@ image. [`InferenceClient.post`] is also useful to handle tasks that are not yet
 b'...'
 ```
 
+## Async client
+
+An async version of the client is also provided, based on `asyncio` and `aiohttp`. You can either install `aiohttp`
+directly or use the `[inference]` extra:
+
+```sh
+pip install --upgrade huggingface_hub[inference]
+# or
+# pip install aiohttp
+```
+
+After installation all async API endpoints are available via [`AsyncInferenceClient`]. Its initialization and APIs are
+strictly the same as the sync-only version.
+
+```py
+# Code must be run in a asyncio concurrent context.
+# $ python -m asyncio
+>>> from huggingface_hub import AsyncInferenceClient
+>>> client = AsyncInferenceClient()
+
+>>> image = await client.text_to_image("An astronaut riding a horse on the moon.")
+>>> image.save("astronaut.png")
+
+>>> async for token in await client.text_generation("The Huggingface Hub is", stream=True):
+...     print(token, end="")
+ a platform for sharing and discussing ML-related content.
+```
+
+For more information about the `asyncio` module, please refer to the [official documentation](https://docs.python.org/3/library/asyncio.html).
+
 ## Advanced tips
 
 In the above section, we saw the main aspects of [`InferenceClient`]. Let's dive into some more advanced tips.

diff --git a/docs/source/package_reference/inference_client.md b/docs/source/package_reference/inference_client.md
@@ -20,6 +20,19 @@ for more information on how to use it.
 
 [[autodoc]] InferenceClient
 
+## Async Inference Client
+
+An async version of the client is also provided, based on `asyncio` and `aiohttp`.
+To use it, you can either install `aiohttp` directly or use the `[inference]` extra:
+
+```sh
+pip install --upgrade huggingface_hub[inference]
+# or
+# pip install aiohttp
+```
+
+[[autodoc]] AsyncInferenceClient
+
 ### InferenceTimeoutError
 
 [[autodoc]] InferenceTimeoutError

diff --git a/setup.py b/setup.py
@@ -29,6 +29,11 @@ def get_version() -> str:
     # Note: installs `prompt-toolkit` in the background
 ]
 
+extras["inference"] = [
+    "aiohttp",  # for AsyncInferenceClient
+    "pydantic",  # for text-generation-inference
+]
+
 extras["torch"] = [
     "torch",
 ]
@@ -42,21 +47,25 @@ def get_version() -> str:
 extras["tensorflow"] = ["tensorflow", "pydot", "graphviz"]
 
 
-extras["testing"] = extras["cli"] + [
-    "jedi",
-    "Jinja2",
-    "pytest",
-    "pytest-cov",
-    "pytest-env",
-    "pytest-xdist",
-    "pytest-vcr",  # to mock Inference
-    "urllib3<2.0",  # VCR.py broken with urllib3 2.0 (see https://urllib3.readthedocs.io/en/stable/v2-migration-guide.html)
-    "soundfile",
-    "Pillow",
-    "gradio",  # to test webhooks
-    "numpy",  # for embeddings
-    "pydantic",  # for text-generation-inference
-]
+extras["testing"] = (
+    extras["cli"]
+    + extras["inference"]
+    + [
+        "jedi",
+        "Jinja2",
+        "pytest",
+        "pytest-cov",
+        "pytest-env",
+        "pytest-xdist",
+        "pytest-vcr",  # to mock Inference
+        "pytest-asyncio",  # for AsyncInferenceClient
+        "urllib3<2.0",  # VCR.py broken with urllib3 2.0 (see https://urllib3.readthedocs.io/en/stable/v2-migration-guide.html)
+        "soundfile",
+        "Pillow",
+        "gradio",  # to test webhooks
+        "numpy",  # for embeddings
+    ]
+)
 
 # Typing extra dependencies list is duplicated in `.pre-commit-config.yaml`
 # Please make sure to update the list there when adding a new typing dependency.

diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
@@ -207,6 +207,9 @@
         "InferenceClient",
         "InferenceTimeoutError",
     ],
+    "inference._generated._async_client": [
+        "AsyncInferenceClient",
+    ],
     "inference_api": [
         "InferenceApi",
     ],
@@ -503,6 +506,7 @@ def __dir__():
         InferenceClient,  # noqa: F401
         InferenceTimeoutError,  # noqa: F401
     )
+    from .inference._generated._async_client import AsyncInferenceClient  # noqa: F401
     from .inference_api import InferenceApi  # noqa: F401
     from .keras_mixin import (
         KerasModelHubMixin,  # noqa: F401