PaddlePaddle · guoshengCS · Aug 10, 2022 · Aug 8, 2022 · Aug 8, 2022 · Aug 9, 2022
diff --git a/docs/model_zoo/taskflow.md b/docs/model_zoo/taskflow.md
@@ -41,6 +41,7 @@ PaddleNLP提供**开箱即用**的产业级NLP预置任务能力，无需训练
 | [智能写诗](#智能写诗) | `Taskflow("poetry_generation")` | ✅ | ✅ | ✅ | | | 使用最大中文开源CPM模型完成写诗 |
 | [开放域对话](#开放域对话) | `Taskflow("dialogue")` | ✅ | ✅ | ✅ | | | 十亿级语料训练最强中文闲聊模型PLATO-Mini，支持多轮对话 |
 | [代码生成](#代码生成) | `Taskflow("code_generation")` | ✅ | ✅ | ✅ | | | 代码生成大模型 |
+| [文图生成](#文图生成) | `Taskflow("text2image_generation")` | ✅ | ✅ | ✅ | | | 文图生成大模型 |
 
 
 ## QuickStart
@@ -1324,6 +1325,40 @@ from paddlenlp import Taskflow
 * `output_scores`：是否要输出解码得分，请默认为False。
 </div></details>
 
+### 文图生成
+<details><summary>&emsp; 通过文图生成模型来生成图片 </summary><div>
+
+#### 支持单条、批量预测
+
+```python
+>>> from paddlenlp import Taskflow
+# 默认模型为 pai-painter-painting-base-zh
+>>> text2imagegen = Taskflow("text2image_generation", model="pai-painter-painting-base-zh")
+# 单条输入
+>>> images = text2imagegen("风阁水帘今在眼，且来先看早梅红")
+# [<PIL.Image.Image image mode=RGB size=1024x256>]
+>>> images[0].save("figure.png")
+# 多条输入
+>>> images = text2imagegen(["风阁水帘今在眼，且来先看早梅红", "见说春风偏有贺，露花千朵照庭闹"])
+# [<PIL.Image.Image image mode=RGB size=1024x256>,
+# <PIL.Image.Image image mode=RGB size=1024x256>]
+>>> for i, image in enumerate(images):
+>>> image.save(f"figure_{i}.png")
+```
+
+#### 可配置参数说明
+* `model`：可选模型，默认为`pai-painter-painting-base-zh`，支持的模型有`["pai-painter-painting-base-zh", "pai-painter-scenery-base-zh", "pai-painter-commercial-base-zh", "dalle-mini", "dalle-mega-v16", "dalle-mega"]`。
+* `batch_size`：批处理大小，请结合机器情况进行调整，默认为1。
+* `temperature`：解码参数temperature，默认为1.0。
+* `top_k`：解码参数top_k，默认为32。
+* `top_p`：解码参数top_p，默认为1.0。
+* `conditional_scale`：dalle-mini模型使用的参数，可参考[推特](https://twitter.com/RiversHaveWings/status/1478093658716966912)，默认为10.0。
+* `num_return_images`：返回图片的数量，默认为4，即4张图片水平拼接形成一张长图。
+* `use_faster`：是否使用faster_generation，默认为False，目前artist模型支持，而dalle-mini模型不支持。
+* `use_fp16_decoding`：是否使用fp16加速解码过程，默认为False，只有当use_faster为True的时候才有效。
+
+</div></details>
+
 ## PART Ⅱ &emsp; 定制化训练
 
 <details><summary>适配任务列表</summary><div>

diff --git a/paddlenlp/taskflow/taskflow.py b/paddlenlp/taskflow/taskflow.py
@@ -37,6 +37,7 @@
 from .dialogue import DialogueTask
 from .information_extraction import UIETask
 from .code_generation import CodeGenerationTask
+from .text2image_generation import Text2ImageGenerationTask
 
 warnings.simplefilter(action='ignore', category=Warning, lineno=0, append=False)
 
@@ -317,6 +318,46 @@
  },
  "default": {
  "model": "Salesforce/codegen-350M-mono",
+ },
+ },
+ "text2image_generation": {
+ "models": {
+ "dalle-mini": {
+ "task_class": Text2ImageGenerationTask,
+ "task_flag": "text2image_generation-dalle-mini",
+ "task_priority_path": "dalle-mini",
+ },
+ "dalle-mega-v16": {
+ "task_class": Text2ImageGenerationTask,
+ "task_flag": "text2image_generation-dalle-mega-v16",
+ "task_priority_path": "dalle-mega-v16",
+ },
+ "dalle-mega": {
+ "task_class": Text2ImageGenerationTask,
+ "task_flag": "text2image_generation-dalle-mega",
+ "task_priority_path": "dalle-mega",
+ },
+ "pai-painter-painting-base-zh": {
+ "task_class": Text2ImageGenerationTask,
+ "task_flag":
+ "text2image_generation-pai-painter-painting-base-zh",
+ "task_priority_path": "pai-painter-painting-base-zh",
+ },
+ "pai-painter-scenery-base-zh": {
+ "task_class": Text2ImageGenerationTask,
+ "task_flag":
+ "text2image_generation-pai-painter-scenery-base-zh",
+ "task_priority_path": "pai-painter-scenery-base-zh",
+ },
+ "pai-painter-commercial-base-zh": {
+ "task_class": Text2ImageGenerationTask,
+ "task_flag":
+ "text2image_generation-pai-painter-commercial-base-zh",
+ "task_priority_path": "pai-painter-commercial-base-zh",
+ },
+ },
+ "default": {
+ "model": "pai-painter-painting-base-zh",
  }
  }
 }

diff --git a/paddlenlp/taskflow/text2image_generation.py b/paddlenlp/taskflow/text2image_generation.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+from PIL import Image
+from ..transformers import AutoModelForImageGeneration, AutoTokenizer
+from .task import Task
+
+usage = r"""
+ from paddlenlp import Taskflow 
+
+ text2imagegen = Taskflow("text2image_generation")
+ images = text2imagegen("风阁水帘今在眼，且来先看早梅红")
+ images[0].save("figure.png")
+
+ """
+
+tokenizer_kwargs = {
+ "dallebart": {
+ "max_length": 64,
+ "return_token_typd_ids": False,
+ "return_attention_mask": True
+ },
+ "gpt": {
+ "max_length": 32,
+ "return_token_typd_ids": False,
+ "return_attention_mask": False
+ },
+}
+
+
+class Text2ImageGenerationTask(Task):
+ """
+ The text2image generation model to generate the image. 
+ Args:
+ task(string): The name of task.
+ model(string): The model name in the task.
+ kwargs (dict, optional): Additional keyword arguments passed along to the specific task. 
+ """
+
+ def __init__(self, task, model="pai-painter-painting-base-zh", **kwargs):
+ super().__init__(task=task, model=model, **kwargs)
+ self._batch_size = kwargs.get("batch_size", 1)
+ self._temperature = kwargs.get("temperature", 1.)
+ self._top_k = kwargs.get("top_k", 32)
+ self._top_p = kwargs.get("top_p", 1.)
+ self._condition_scale = kwargs.get("condition_scale", 10.)
+ self._num_return_images = kwargs.get("num_return_images", 4)
+ self._use_faster = kwargs.get("use_faster", False)
+ self._use_fp16_decoding = kwargs.get("use_fp16_decoding", False)
+ self._construct_tokenizer(model)
+ self._construct_model(model)
+
+ def _construct_model(self, model):
+ """
+ Construct the inference model for the predictor.
+ """
+ self._model = AutoModelForImageGeneration.from_pretrained(model)
+ self._model.eval()
+
+ def _construct_tokenizer(self, model):
+ """
+ Construct the tokenizer for the predictor.
+ """
+ self._tokenizer = AutoTokenizer.from_pretrained(model)
+
+ def _batchify(self, data, batch_size):
+ """
+ Generate input batches.
+ """
+
+ def _parse_batch(batch_examples):
+ tokenizerd_inputs = self._tokenizer(
+ batch_examples,
+ return_tensors="pd",
+ padding="max_length",
+ truncation=True,
+ **tokenizer_kwargs[self._model.base_model_prefix])
+ if self._model.base_model_prefix == "dallebart":
+ tokenizerd_inputs["condition_scale"] = self._condition_scale
+ return tokenizerd_inputs
+
+ # Seperates data into some batches.
+ one_batch = []
+ for example in data:
+ one_batch.append(example)
+ if len(one_batch) == batch_size:
+ yield _parse_batch(one_batch)
+ one_batch = []
+ if one_batch:
+ yield _parse_batch(one_batch)
+
+ def _preprocess(self, inputs):
+ """
+ Transform the raw text to the model inputs, two steps involved:
+ 1) Transform the raw text to token ids.
+ 2) Generate the other model inputs from the raw text and token ids.
+ """
+ inputs = self._check_input_text(inputs)
+ batches = self._batchify(inputs, self._batch_size)
+ outputs = {'batches': batches, 'text': inputs}
+ return outputs
+
+ def _run_model(self, inputs):
+ """
+ Run the task model from the outputs of the `_preprocess` function.
+ """
+ all_images = []
+
+ for batch_inputs in inputs["batches"]:
+ images = self._model.generate(
+ **batch_inputs,
+ temperature=self._temperature,
+ top_k=self._top_k,
+ top_p=self._top_p,
+ num_return_sequences=self._num_return_images,
+ use_faster=self._use_faster,
+ use_fp16_decoding=self._use_fp16_decoding).cpu().numpy()
+ if self._model.base_model_prefix == "dallebart":
+ images = (images.clip(0, 1) * 255).astype("uint8")
+ elif self._model.base_model_prefix == "gpt":
+ images = ((images + 1.0) * 127.5).clip(0, 255).astype("uint8")
+ for image in images:
+ all_images.append(image)
+ inputs['images'] = all_images
+ return inputs
+
+ def _postprocess(self, inputs):
+ """
+ The model output is images, this function will convert the model output to PIL Image.
+ """
+ batch_out = []
+ generated_images = inputs['images']
+ for generated_image in generated_images:
+ generated_image = generated_image.transpose([1, 0, 2, 3]).reshape(
+ generated_image.shape[-3],
+ self._num_return_images * generated_image.shape[-2],
+ generated_image.shape[-1])
+ batch_out.append(Image.fromarray(generated_image))
+
+ return batch_out
+
+ def _construct_input_spec(self):
+ """
+ Construct the input spec for the convert dygraph model to static model.
+ """
+ self._input_spec = [
+ paddle.static.InputSpec(shape=[None, None],
+ dtype="int64",
+ name='input_ids'),
+ ]