Add AsyncioModelClient

triton-inference-server · Jul 27, 2023 · 1890816 · 1890816
1 parent 330d748
commit 1890816
Show file tree

Hide file tree

Showing 17 changed files with 2,247 additions and 389 deletions.
diff --git a/docs/clients.md b/docs/clients.md
@@ -103,7 +103,7 @@ with FuturesModelClient("localhost:8000", "TextGen") as client:
  # Wait for all the futures to complete and get the results
  output_data_list = [output_data_future.result() for output_data_future in output_data_futures]
 
-# Print the list of result dictionaries
+# Print tokens
 print(output_data_list)
 
 # Detokenize the output texts using the tokenizer and print them
@@ -161,6 +161,114 @@ for output_data in output_data_list:
  print(f"The image is classified as {class_name}.")
 ```
 
+## AsyncioModelClient
+
+AsyncioModelClient is an asynchronous client that can perform inference requests using the asyncio library. You can use AsyncioModelClient to communicate with the deployed model using HTTP or gRPC protocol. You can specify the protocol when creating the AsyncioModelClient object.
+
+For example, you can use AsyncioModelClient to send requests to a PyTorch model that performs linear regression:
+
+<!--pytest.mark.skip-->
+```python
+import torch
+from pytriton.client import AsyncioModelClient
+
+# Create some input data as a numpy array
+input1_data = torch.randn(2).cpu().detach().numpy()
+
+# Create an AsyncioModelClient object with the server address and model name
+async with AsyncioModelClient("localhost:8000", "Linear") as client:
+ # Call the infer_sample method with the input data
+ result_dict = await client.infer_sample(input1_data)
+
+# Print the result dictionary
+print(result_dict)
+```
+
+You can also use FastAPI to create a web application that exposes the results of inference at an HTTP endpoint. FastAPI is a modern, fast, web framework for building APIs with Python 3.6+ based on standard Python type hints.
+
+To use FastAPI, you need to install it with:
+
+```bash
+pip install fastapi
+```
+
+You also need an ASGI server, for production such as Uvicorn or Hypercorn.
+
+To install Uvicorn, run:
+
+```bash
+pip install uvicorn[standard]
+```
+
+The `uvicorn` uses port `8000` as default for web server. Triton server default port is also `8000` for HTTP protocol. You can change uvicorn port by using `--port` option. PyTriton also supports custom ports configuration for Triton server. The class `TritonConfig` contains parameters for ports configuration. You can pass it to `Triton` during initialization:
+
+<!--pytest.mark.skip-->
+```python
+config = TritonConfig(http_port=8015)
+triton_server = Triton(config=config)
+```
+
+You can use this `triton_server` object to bind your inference model and run HTTP endpoint from Triton Inference Server at port `8015`.
+
+
+Then you can create a FastAPI app that uses the AsyncioModelClient to perform inference and return the results as JSON:
+
+<!--pytest.mark.skip-->
+```python
+from fastapi import FastAPI
+import torch
+from pytriton.client import AsyncioModelClient
+
+app = FastAPI()
+
+@app.get("/predict")
+async def predict():
+ # Create some input data as a numpy array
+ input1_data = torch.randn(2).cpu().detach().numpy()
+
+ # Create an AsyncioModelClient object with the server address and model name
+ async with AsyncioModelClient("localhost:8000", "Linear") as client:
+ # Call the infer_sample method with the input data
+ result_dict = await client.infer_sample(input1_data)
+
+ # Return the result dictionary as JSON
+ return result_dict
+```
+
+Save this file as `main.py`.
+
+To run the app, use the command:
+
+<!--pytest.mark.skip-->
+```bash
+uvicorn main:app --reload --port 8015
+```
+
+You can then access the endpoint at `http://127.0.0.1:8015/predict` and see the JSON response.
+
+You can also check the interactive API documentation at `http://127.0.0.1:8015/docs`.
+
+You can test your server using curl:
+
+<!--pytest.mark.skip-->
+```bash
+curl -X 'GET' \
+ 'http://127.0.0.1:8015/predict' \
+ -H 'accept: application/json'
+```
+
+Command will print three random numbers:
+<!--pytest.mark.skip-->
+```python
+[-0.2608422636985779,-0.6435106992721558,-0.3492531180381775]
+```
+
+For more information about FastAPI and Uvicorn, check out these links:
+
+- [FastAPI documentation](https://fastapi.tiangolo.com/)
+- [Uvicorn documentation](https://www.uvicorn.org/)
+
+
 ## Client timeouts
 
 When creating a [ModelClient][pytriton.client.client.ModelClient] or [FuturesModelClient][pytriton.client.client.FuturesModelClient] object, you can specify the timeout for waiting until the server and model are ready using the `init_timeout_s` parameter. By default, the timeout is set to 5 minutes (300 seconds).
@@ -231,4 +339,4 @@ with FuturesModelClient("localhost", "MyModel", inference_timeout_s=240) as clie
 
 !!! info "Server side timeout not implemented"
 
- Currently, there is no support for server-side timeout. The server will continue to process the request even if the client timeout is reached.
+ Currently, there is no support for server-side timeout. The server will continue to process the request even if the client timeout is reached.
diff --git a/pyproject.toml b/pyproject.toml
@@ -62,6 +62,8 @@ test = [
  "tqdm >= 4.64.1",
  "psutil ~= 5.1",
  "py-spy ~= 0.3",
+ "alt-pytest-asyncio ~= 0.7.1", # timeout for asyncio tests
+ "pytest-timeout ~= 2.1.0", # timeouts for non-asyncio tests
 ]
 doc = [
  "GitPython >= 3.1.30",
@@ -77,11 +79,10 @@ dev = [
  "build >= 0.8",
  "ipython >= 7.16",
  "isort >= 5.10",
- "pdbpp >= 0.10",
  "pudb >= 2022.1.3",
  "pip >= 21.3", # to support editable installation
  "pre-commit >= 2.20.0",
- "twine >= 4.0"
+ "twine >= 4.0",
 ]
 
 [build-system]

diff --git a/pytriton/client/__init__.py b/pytriton/client/__init__.py
@@ -13,5 +13,6 @@
 # limitations under the License.
 # noqa: D104
 
+from .client import AsyncioModelClient # noqa: F401
 from .client import FuturesModelClient # noqa: F401
 from .client import ModelClient # noqa: F401