From 6f79391db49ddcbfef53bc82a437347d71da796e Mon Sep 17 00:00:00 2001 From: lycheel1 Date: Wed, 10 Apr 2024 21:07:27 +0000 Subject: [PATCH 1/3] docs: init io-descriptors example rebase to main --- examples/io-descriptors/README.md | 120 +++++++++++++++++++++++ examples/io-descriptors/bentofile.yaml | 3 + examples/io-descriptors/requirements.txt | 7 ++ examples/io-descriptors/service.py | 108 ++++++++++++++++++++ 4 files changed, 238 insertions(+) create mode 100644 examples/io-descriptors/README.md create mode 100644 examples/io-descriptors/bentofile.yaml create mode 100644 examples/io-descriptors/requirements.txt create mode 100644 examples/io-descriptors/service.py diff --git a/examples/io-descriptors/README.md b/examples/io-descriptors/README.md new file mode 100644 index 00000000000..7f6c3d856ab --- /dev/null +++ b/examples/io-descriptors/README.md @@ -0,0 +1,120 @@ +# BentoML Input/Output Types Tutorial + +A BentoML service supports a wide range of data types when creating a Service API. The data types can be catagorized as follows: +- Python Standards: `str`, `int`, `float`, `list`, `dict` etc. +- Pydantic field types: see [Pydantic types documentation](https://field-idempotency--pydantic-docs.netlify.app/usage/types/). +- ML specific types: `nummpy.ndarray`, `torch.Tensor` , `tf.Tensor` for tensor data, `pd.DataFrame` for tabular data, `PIL.Image.Image` for +Image data, and `pathlib.Path` for files such as audios, images, and pdfs. + +When creating a Bentoml service, you should use Python's type annotations to define the expected input and output types for each API endpoint. This +step can not only helps validate the data against the specified schema, but also enhances the clarity and readability of your code. Type annotations play +an important role in generating the BentoML API, client, and service UI components, ensuring a consitent and predictable interaction with the Service. + +You can also use `pydantic.Field` to set additional information about servicee parameters, such as default values and descriptions. This improves the API's +usability and provides basic documentation. See the following examples for details. + +## Running this example + +Let's start with the environment. We recommend using virtual environment for better package handling. + +```bash +python -m venv io-descriptors-example +source io-descriptors-example/bin/activate +pip install -r requirements.txt +``` + +7 different API services are implemented within this example, with diversed input/output types. When running, you should specified the class name of the service +you'd like to run inside `bentofile.yaml`. + +```yaml +service: "service.py:AudioSpeedUp" +include: + - "service.py" +``` + +In the above configuration through `bentofile.yaml`, we're running the `AudioSpeedUp` service, which you can find on line 62 of `service.py`. When running a different +service, simply replace `AudioSpeedUp` with the class name of the service. + +For example, if you want to run the first service `ImageResize`, you can configure the `bentofile.yaml` as follows: + +```yaml +service: "service.py:ImageResize" +include: + - "service.py" +``` + +After you finished configuring `bentofile.yaml`, run `bentoml serve .` to deploy the service locally. You can then interact with the auto-generated swagger UI to play +around with each different API endpoints. + +## Different data types + +### Standard Python types + +The following demonstrates a simple addtion service, with both inputs and output as float parameters. You can +obviously change the type annotation to `int`, `str` etc. to get familiar with the interaction between type +annotaions and the auto-generated Swagger UI when deploying locally.\ + +```python +@bentoml.service() +class AdditionService: + + @bentoml.api() + def add(self, num1: float, num2: float) -> float: + return num1 + num2 +``` + +### Files + +Files are handled through `pathlib.Path` in BentoML (which means you should handle the file as a file path in your API implementation as well as on the client side). +Most file types can be specified through `bentoml.validators.Contentype()`. The input of this function follows the standard of the +request format (such as `text/plain`, `application/pdf`, `audio/mp3` etc.). + +##### Appending Strings to File example +```python +@bentoml.service() +class AppendStringToFile: + + @bentoml.api() + def append_string_to_eof( + self, + txt_file: t.Annotated[Path, bentoml.validators.ContentType("text/plain")], input_string: str + ) -> t.Annotated[Path, bentoml.validators.ContentType("text/plain")]: + with open(txt_file, "a") as file: + file.write(input_string) + return txt_file +``` + +Within `service.py`, example API services with 4 different file types are implemented (audio, image, text file, and pdf file). The functionality of each service +is quite simple and self-explainatory. + +Notice that for class `ImageResize`, two different API endpoints are implemented. This is because BentoML can support images parameters directly through +`PIL.Image.Image`, which means that image objects can be directly passed through clients, instead of a file object. + +The last two services are examples of having `numpy.ndarray` or `pandas.DataFrame` as input parameters. Since they all work quite similarly with the above examples, +we will not specifically explain them in this tutorial. You can try to write revise the service with `torch.Tensor` as input to check your understanding. + +To serve the these examples locally, run `bentoml serve .` + +```bash +bentoml serve . +2024-03-22T19:25:24+0000 [INFO] [cli] Starting production HTTP BentoServer from "service:ImageResize" listening on http://localhost:3000 (Press CTRL+C to quit) +``` + +Open your web browser at http://0.0.0.0:3000 to view the Swagger UI for sending test requests. + +You may also send request with `curl` command or any HTTP client, e.g.: + +```bash +curl -X 'POST' \ + 'http://0.0.0.0:3000/classify_generated_texts' \ + -H 'accept: application/json' \ + -H 'Content-Type: text/plain' \ + -d 'I have an idea!' +``` + +## Deploy to BentoCloud +Run the following command to deploy this example to BentoCloud for better management and scalability. [Sign up](https://www.bentoml.com/) if you haven't got a BentoCloud account. +```bash +bentoml deploy . +``` +For more information, see [Create Deployments](https://docs.bentoml.com/en/latest/bentocloud/how-tos/create-deployments.html). diff --git a/examples/io-descriptors/bentofile.yaml b/examples/io-descriptors/bentofile.yaml new file mode 100644 index 00000000000..fb451b42d60 --- /dev/null +++ b/examples/io-descriptors/bentofile.yaml @@ -0,0 +1,3 @@ +service: "service.py:AudioSpeedUp" +include: + - "service.py" diff --git a/examples/io-descriptors/requirements.txt b/examples/io-descriptors/requirements.txt new file mode 100644 index 00000000000..ce8c440d8a2 --- /dev/null +++ b/examples/io-descriptors/requirements.txt @@ -0,0 +1,7 @@ +diffusers +bentoml +transformers +torch +accelerate +pydub +pdf2img diff --git a/examples/io-descriptors/service.py b/examples/io-descriptors/service.py new file mode 100644 index 00000000000..217dbc3c4d9 --- /dev/null +++ b/examples/io-descriptors/service.py @@ -0,0 +1,108 @@ +import typing as t +from pathlib import Path + +import numpy as np +import pandas as pd +import torch +from PIL import Image as im +from PIL.Image import Image +from pydantic import Field + +import bentoml +from bentoml.validators import DataframeSchema +from bentoml.validators import DType + +# PROMPT_TEMPLATE = """\nUSER: What's the content of the image?\nASSISTANT:""" + + +@bentoml.service() +class ImageResize: + @bentoml.api() + def generate(self, image: Image, height: str = "64", width: str = "64") -> Image: + size = int(height), int(width) + return image.resize(size, im.ANTIALIAS) + + @bentoml.api() + def generate_with_path( + self, + image: t.Annotated[Path, bentoml.validators.ContentType("image/jpeg")], + height: str = "64", + width: str = "64", + ) -> Image: + size = int(height), int(width) + image = im.open(image) + return image.resize(size, im.ANTIALIAS) + + +@bentoml.service() +class AdditionService: + @bentoml.api() + def add(self, num1: float, num2: float) -> float: + return num1 + num2 + + +@bentoml.service() +class AppendStringToFile: + @bentoml.api() + def append_string_to_eof( + self, + txt_file: t.Annotated[Path, bentoml.validators.ContentType("text/plain")], + input_string: str, + ) -> t.Annotated[Path, bentoml.validators.ContentType("text/plain")]: + with open(txt_file, "a") as file: + file.write(input_string) + return txt_file + + +@bentoml.service() +class PDFtoImage: + @bentoml.api() + def pdf_first_page_as_image( + self, + pdf: t.Annotated[Path, bentoml.validators.ContentType("application/pdf")], + ) -> Image: + from pdf2image import convert_from_path + + pages = convert_from_path(pdf) + return pages[0].resize(pages[0].size, im.ANTIALIAS) + + +@bentoml.service() +class AudioSpeedUp: + @bentoml.api() + def speed_up_audio( + self, + audio: t.Annotated[Path, bentoml.validators.ContentType("audio/mpeg")], + velocity: float, + ) -> t.Annotated[Path, bentoml.validators.ContentType("audio/mp3")]: + from pydub import AudioSegment + + sound = AudioSegment.from_file(audio) # type: + sound = sound.speedup(velocity) + sound.export("output.mp3", format="mp3") + return Path("output.mp3") + + +@bentoml.service() +class TransposeTensor: + @bentoml.api() + def transpose( + self, + tensor: t.Annotated[torch.Tensor, DType("float32")] = Field( + description="A 2x4 tensor with float32 dtype" + ), + ) -> np.ndarray: + return torch.transpose(tensor, 0, 1).numpy() + + +@bentoml.service() +class CountRowsDF: + @bentoml.api() + def count_rows( + self, + input: t.Annotated[ + pd.DataFrame, + DataframeSchema(orient="records", columns=["dummy1", "dummy2"]), + ], + ) -> int: + return len(input) From 421942ebeb5e2e3540feec981f48f625b75a4b54 Mon Sep 17 00:00:00 2001 From: lycheel1 Date: Fri, 26 Apr 2024 15:53:37 +0000 Subject: [PATCH 2/3] examples: update io-descriptor --- examples/io-descriptors/README.md | 51 ++++++++++++++---------- examples/io-descriptors/requirements.txt | 1 + examples/io-descriptors/service.py | 31 +++++++------- 3 files changed, 47 insertions(+), 36 deletions(-) diff --git a/examples/io-descriptors/README.md b/examples/io-descriptors/README.md index 7f6c3d856ab..1bb3955e052 100644 --- a/examples/io-descriptors/README.md +++ b/examples/io-descriptors/README.md @@ -1,19 +1,21 @@ # BentoML Input/Output Types Tutorial -A BentoML service supports a wide range of data types when creating a Service API. The data types can be catagorized as follows: +BentoML supports a wide range of data types when creating a Service API. The data types can be catagorized as follows: - Python Standards: `str`, `int`, `float`, `list`, `dict` etc. - Pydantic field types: see [Pydantic types documentation](https://field-idempotency--pydantic-docs.netlify.app/usage/types/). - ML specific types: `nummpy.ndarray`, `torch.Tensor` , `tf.Tensor` for tensor data, `pd.DataFrame` for tabular data, `PIL.Image.Image` for Image data, and `pathlib.Path` for files such as audios, images, and pdfs. -When creating a Bentoml service, you should use Python's type annotations to define the expected input and output types for each API endpoint. This -step can not only helps validate the data against the specified schema, but also enhances the clarity and readability of your code. Type annotations play -an important role in generating the BentoML API, client, and service UI components, ensuring a consitent and predictable interaction with the Service. +When creating a Bentoml Service, you should use Python's type annotations to define the expected input and output types for each API endpoint. This +step can not only help validate the data against the specified schema, but also enhances the clarity and readability of your code. Type annotations play +an important role in generating the BentoML API, client, and Service UI components, ensuring a consitent and predictable interaction with the Service. -You can also use `pydantic.Field` to set additional information about servicee parameters, such as default values and descriptions. This improves the API's -usability and provides basic documentation. See the following examples for details. +You can also use `pydantic.Field` to set additional information about service parameters, such as default values and descriptions. This improves the API's +usability and provides basic documentation. -## Running this example +In this tutorial, you will learn how to set different input and output types for BentoML Services. + +## Installing Dependencies Let's start with the environment. We recommend using virtual environment for better package handling. @@ -23,7 +25,8 @@ source io-descriptors-example/bin/activate pip install -r requirements.txt ``` -7 different API services are implemented within this example, with diversed input/output types. When running, you should specified the class name of the service +## Running a Service +7 different API Services are implemented in `service.py`, with diversed input/output types. When running, you should specified the class name of the Service you'd like to run inside `bentofile.yaml`. ```yaml @@ -32,10 +35,10 @@ include: - "service.py" ``` -In the above configuration through `bentofile.yaml`, we're running the `AudioSpeedUp` service, which you can find on line 62 of `service.py`. When running a different -service, simply replace `AudioSpeedUp` with the class name of the service. +In the above configuration through `bentofile.yaml`, we're running the `AudioSpeedUp` Service, which you can find on line 62 of `service.py`. When running a different +Service, simply replace `AudioSpeedUp` with the class name of the Service. -For example, if you want to run the first service `ImageResize`, you can configure the `bentofile.yaml` as follows: +For example, if you want to run the first Service `ImageResize`, you can configure the `bentofile.yaml` as follows: ```yaml service: "service.py:ImageResize" @@ -43,14 +46,14 @@ include: - "service.py" ``` -After you finished configuring `bentofile.yaml`, run `bentoml serve .` to deploy the service locally. You can then interact with the auto-generated swagger UI to play +After you finished configuring `bentofile.yaml`, run `bentoml serve .` to deploy the Service locally. You can then interact with the auto-generated swagger UI to play around with each different API endpoints. ## Different data types ### Standard Python types -The following demonstrates a simple addtion service, with both inputs and output as float parameters. You can +The following demonstrates a simple addtion Service, with both inputs and output as float parameters. You can obviously change the type annotation to `int`, `str` etc. to get familiar with the interaction between type annotaions and the auto-generated Swagger UI when deploying locally.\ @@ -84,19 +87,20 @@ class AppendStringToFile: return txt_file ``` -Within `service.py`, example API services with 4 different file types are implemented (audio, image, text file, and pdf file). The functionality of each service -is quite simple and self-explainatory. +Within `service.py`, example API Services with 4 different file types are implemented (audio, image, text file, and pdf file). The functionality of each Service +is quite simple and self-explanatory. Notice that for class `ImageResize`, two different API endpoints are implemented. This is because BentoML can support images parameters directly through `PIL.Image.Image`, which means that image objects can be directly passed through clients, instead of a file object. -The last two services are examples of having `numpy.ndarray` or `pandas.DataFrame` as input parameters. Since they all work quite similarly with the above examples, -we will not specifically explain them in this tutorial. You can try to write revise the service with `torch.Tensor` as input to check your understanding. +The last two Services are examples of having `numpy.ndarray` or `pandas.DataFrame` as input parameters. Since they all work quite similarly with the above examples, +we will not specifically explain them in this tutorial. You can try to write revise the Service with `torch.Tensor` as input to check your understanding. To serve the these examples locally, run `bentoml serve .` ```bash -bentoml serve . +$ bentoml serve . + 2024-03-22T19:25:24+0000 [INFO] [cli] Starting production HTTP BentoServer from "service:ImageResize" listening on http://localhost:3000 (Press CTRL+C to quit) ``` @@ -106,10 +110,15 @@ You may also send request with `curl` command or any HTTP client, e.g.: ```bash curl -X 'POST' \ - 'http://0.0.0.0:3000/classify_generated_texts' \ + 'http://localhost:3000/transpose' \ -H 'accept: application/json' \ - -H 'Content-Type: text/plain' \ - -d 'I have an idea!' + -H 'Content-Type: application/json' \ + -d '{ + "tensor": [ + [0, 1, 2, 3], + [4, 5, 6, 7] + ] +}' ``` ## Deploy to BentoCloud diff --git a/examples/io-descriptors/requirements.txt b/examples/io-descriptors/requirements.txt index ce8c440d8a2..1ed516b9d15 100644 --- a/examples/io-descriptors/requirements.txt +++ b/examples/io-descriptors/requirements.txt @@ -5,3 +5,4 @@ torch accelerate pydub pdf2img +pandas \ No newline at end of file diff --git a/examples/io-descriptors/service.py b/examples/io-descriptors/service.py index 217dbc3c4d9..6d2f74a276a 100644 --- a/examples/io-descriptors/service.py +++ b/examples/io-descriptors/service.py @@ -12,26 +12,24 @@ from bentoml.validators import DataframeSchema from bentoml.validators import DType -# PROMPT_TEMPLATE = """\nUSER: What's the content of the image?\nASSISTANT:""" - @bentoml.service() class ImageResize: @bentoml.api() - def generate(self, image: Image, height: str = "64", width: str = "64") -> Image: - size = int(height), int(width) - return image.resize(size, im.ANTIALIAS) + def generate(self, image: Image, height: int = 64, width: int = 64) -> Image: + size = height, width + return image.resize(size, im.LANCZOS) @bentoml.api() def generate_with_path( self, image: t.Annotated[Path, bentoml.validators.ContentType("image/jpeg")], - height: str = "64", - width: str = "64", + height: int = 64, + width: int = 64, ) -> Image: - size = int(height), int(width) + size = height, width image = im.open(image) - return image.resize(size, im.ANTIALIAS) + return image.resize(size, im.LANCZOS) @bentoml.service() @@ -46,12 +44,13 @@ class AppendStringToFile: @bentoml.api() def append_string_to_eof( self, + context: bentoml.Context, txt_file: t.Annotated[Path, bentoml.validators.ContentType("text/plain")], input_string: str, ) -> t.Annotated[Path, bentoml.validators.ContentType("text/plain")]: - with open(txt_file, "a") as file: + with open(output_path, "a") as file: file.write(input_string) - return txt_file + return output_path @bentoml.service() @@ -72,15 +71,17 @@ class AudioSpeedUp: @bentoml.api() def speed_up_audio( self, + context: bentoml.Context, audio: t.Annotated[Path, bentoml.validators.ContentType("audio/mpeg")], velocity: float, ) -> t.Annotated[Path, bentoml.validators.ContentType("audio/mp3")]: from pydub import AudioSegment - - sound = AudioSegment.from_file(audio) # type: + import os + output_path = os.path.join(context.temp_dir, "output.mp3") + sound = AudioSegment.from_file(audio) sound = sound.speedup(velocity) - sound.export("output.mp3", format="mp3") - return Path("output.mp3") + sound.export(output_path, format="mp3") + return Path(output_path) @bentoml.service() From 1968d5bfbc21216dace64365aa21055a2593484a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Apr 2024 15:56:00 +0000 Subject: [PATCH 3/3] ci: auto fixes from pre-commit.ci For more information, see https://pre-commit.ci --- examples/io-descriptors/requirements.txt | 2 +- examples/io-descriptors/service.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/io-descriptors/requirements.txt b/examples/io-descriptors/requirements.txt index 1ed516b9d15..555c2926121 100644 --- a/examples/io-descriptors/requirements.txt +++ b/examples/io-descriptors/requirements.txt @@ -5,4 +5,4 @@ torch accelerate pydub pdf2img -pandas \ No newline at end of file +pandas diff --git a/examples/io-descriptors/service.py b/examples/io-descriptors/service.py index 6d2f74a276a..0790366c4e5 100644 --- a/examples/io-descriptors/service.py +++ b/examples/io-descriptors/service.py @@ -75,10 +75,12 @@ def speed_up_audio( audio: t.Annotated[Path, bentoml.validators.ContentType("audio/mpeg")], velocity: float, ) -> t.Annotated[Path, bentoml.validators.ContentType("audio/mp3")]: - from pydub import AudioSegment import os + + from pydub import AudioSegment + output_path = os.path.join(context.temp_dir, "output.mp3") - sound = AudioSegment.from_file(audio) + sound = AudioSegment.from_file(audio) sound = sound.speedup(velocity) sound.export(output_path, format="mp3") return Path(output_path)