PaddlePaddle · westfish · Nov 10, 2022 · Nov 9, 2022 · Nov 10, 2022 · Nov 10, 2022
diff --git a/ppdiffusers/ppdiffusers/__init__.py b/ppdiffusers/ppdiffusers/__init__.py
@@ -44,7 +44,7 @@
         get_scheduler,
     )
     from .pipeline_utils import DiffusionPipeline
-    from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, PNDMPipeline, ScoreSdeVePipeline
+    from .pipelines import DDIMPipeline, DDPMPipeline, KarrasVePipeline, LDMPipeline, LDMSuperResolutionPipeline, PNDMPipeline, ScoreSdeVePipeline
     from .schedulers import (
         EulerAncestralDiscreteScheduler,
         DDIMScheduler,

diff --git a/ppdiffusers/ppdiffusers/pipelines/__init__.py b/ppdiffusers/ppdiffusers/pipelines/__init__.py
@@ -26,7 +26,7 @@
     from ..utils.dummy_paddle_objects import *  # noqa F403
 
 if is_paddle_available() and is_paddlenlp_available():
-    from .latent_diffusion import LDMTextToImagePipeline, LDMBertModel
+    from .latent_diffusion import LDMTextToImagePipeline, LDMBertModel, LDMSuperResolutionPipeline
     from .stable_diffusion import (StableDiffusionImg2ImgPipeline,
                                    StableDiffusionInpaintPipeline,
                                    StableDiffusionPipeline,

diff --git a/ppdiffusers/ppdiffusers/pipelines/latent_diffusion/__init__.py b/ppdiffusers/ppdiffusers/pipelines/latent_diffusion/__init__.py
@@ -18,3 +18,4 @@
 
 if is_paddlenlp_available():
     from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
+    from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline
diff --git a/...users/ppdiffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py b/...users/ppdiffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py
@@ -0,0 +1,174 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+from typing import Optional, Tuple, Union
+import numpy as np
+import PIL
+
+import paddle
+import paddle.nn as nn
+from ...models import UNet2DModel, VQModel
+from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
+from ...schedulers import (
+    DDIMScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+)
+from paddlenlp.utils.tools import compare_version
+if compare_version(PIL.__version__, "9.1.0") >= 0:
+    Resampling = PIL.Image.Resampling
+else:
+    Resampling = PIL.Image
+
+
+def preprocess(image):
+    w, h = image.size
+    w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
+    image = image.resize((w, h), resample=Resampling.LANCZOS)
+    image = np.array(image).astype(np.float32) / 255.0
+    image = image[None].transpose(0, 3, 1, 2)
+    image = paddle.to_tensor(image)
+    return 2.0 * image - 1.0
+
+
+class LDMSuperResolutionPipeline(DiffusionPipeline):
+    r"""
+    A pipeline for image super-resolution using Latent
+    This class inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
+    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
+
+    Parameters:
+        vqvae ([`VQModel`]):
+            Vector-quantized (VQ) VAE Model to encode and decode images to and from latent representations.
+        unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image.
+        scheduler ([`SchedulerMixin`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+            [`DDIMScheduler`], [`LMSDiscreteScheduler`],[`PNDMScheduler`].
+    """
+
+    def __init__(
+        self,
+        vqvae: VQModel,
+        unet: UNet2DModel,
+        scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler, ],
+    ):
+        super().__init__()
+        self.register_modules(vqvae=vqvae, unet=unet, scheduler=scheduler)
+
+    @paddle.no_grad()
+    def __call__(
+        self,
+        init_image: Union[paddle.Tensor, PIL.Image.Image],
+        batch_size: Optional[int] = 1,
+        num_inference_steps: Optional[int] = 100,
+        eta: Optional[float] = 0.0,
+        seed: Optional[int] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        **kwargs,
+    ) -> Union[Tuple, ImagePipelineOutput]:
+        r"""
+        Args:
+            init_image (`paddle.Tensor` or `PIL.Image.Image`):
+                `Image`, or tensor representing an image batch, that will be used as the starting point for the
+                process.
+            batch_size (`int`, *optional*, defaults to 1):
+                Number of images to generate.
+            num_inference_steps (`int`, *optional*, defaults to 100):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            eta (`float`, *optional*, defaults to 0.0):
+                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+                [`schedulers.DDIMScheduler`], will be ignored for others.
+            seed (`int`, *optional*):
+                The seed used by paddle.randn().
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generate image. Choose between
+                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+            return_dict (`bool`, *optional*):
+                Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.
+
+        Returns:
+            [`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
+            `return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
+            generated images.
+        """
+
+        if isinstance(init_image, PIL.Image.Image):
+            batch_size = 1
+        elif isinstance(init_image, paddle.Tensor):
+            batch_size = init_image.shape[0]
+        else:
+            raise ValueError(
+                f"`init_image` has to be of type `PIL.Image.Image` or `paddle.Tensor` but is {type(init_image)}"
+            )
+
+        if isinstance(init_image, PIL.Image.Image):
+            init_image = preprocess(init_image)
+
+        height, width = init_image.shape[-2:]
+
+        # in_channels should be 6: 3 for latents, 3 for low resolution image
+        latents_shape = (batch_size, self.unet.in_channels // 2, height, width)
+        latents_dtype = self.unet.dtype
+
+        if seed is not None: paddle.seed(seed)
+        latents = paddle.randn(latents_shape, dtype=latents_dtype)
+
+        init_image = init_image.astype(latents_dtype)
+
+        # set timesteps
+        self.scheduler.set_timesteps(num_inference_steps)
+
+        # Some schedulers like PNDM have timesteps as arrays
+        # It's more optimized to move all timesteps to correct device beforehand
+        timesteps_tensor = self.scheduler.timesteps
+
+        # scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+
+        # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature.
+        # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
+        # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
+        # and should be between [0, 1]
+        accepts_eta = "eta" in set(
+            inspect.signature(self.scheduler.step).parameters.keys())
+        extra_kwargs = {}
+        if accepts_eta:
+            extra_kwargs["eta"] = eta
+
+        for t in self.progress_bar(timesteps_tensor):
+            # concat latents and low resolution image in the channel dimension.
+            latents_input = paddle.concat([latents, init_image], axis=1)
+            latents_input = self.scheduler.scale_model_input(latents_input, t)
+            # predict the noise residual
+            noise_pred = self.unet(latents_input, t).sample
+            # compute the previous noisy sample x_t -> x_t-1
+            latents = self.scheduler.step(noise_pred, t, latents,
+                                          **extra_kwargs).prev_sample
+
+        # decode the image latents with the VQVAE
+        image = self.vqvae.decode(latents).sample
+        image = paddle.clip(image, -1.0, 1.0)
+        image = image / 2 + 0.5
+        image = image.transpose([0, 2, 3, 1]).numpy()
+
+        if output_type == "pil":
+            image = self.numpy_to_pil(image)
+
+        if not return_dict:
+            return (image, )
+
+        return ImagePipelineOutput(images=image)
diff --git a/ppdiffusers/ppdiffusers/utils/dummy_paddle_objects.py b/ppdiffusers/ppdiffusers/utils/dummy_paddle_objects.py
@@ -198,6 +198,21 @@ def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["paddle"])
 
 
+class LDMSuperResolutionPipeline(metaclass=DummyObject):
+    _backends = ["paddle"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["paddle"])
+
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["paddle"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["paddle"])
+
+
 class PNDMPipeline(metaclass=DummyObject):
     _backends = ["paddle"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -18,3 +18,4 @@

		if is_paddlenlp_available():
		from .pipeline_latent_diffusion import LDMBertModel, LDMTextToImagePipeline
		from .pipeline_latent_diffusion_superresolution import LDMSuperResolutionPipeline