Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#39 from westfish/remove_literal
Browse files Browse the repository at this point in the history
lvdm delete redundant imports
  • Loading branch information
westfish authored Aug 2, 2023
2 parents de634ce + 7cdece0 commit 373cfd2
Show file tree
Hide file tree
Showing 15 changed files with 7 additions and 62 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import paddle
import warnings


def _is_tensor_video_clip(clip):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import paddle
import numbers
import random
import warnings
from . import _functional_video as F


Expand Down
1 change: 0 additions & 1 deletion ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from paddlenlp.transformers import AutoTokenizer, CLIPTextModel
from paddlenlp.utils.log import logger
Expand Down
6 changes: 1 addition & 5 deletions ppdiffusers/examples/text_to_video_lvdm/lvdm/lvdm_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import contextlib
import os
import sys

import paddle
Expand All @@ -25,11 +24,8 @@
INTEGRATION_TO_CALLBACK,
VisualDLCallback,
rewrite_logs, )
from paddlenlp.trainer.utils.helper import ( # nested_truncate,
distributed_concat, nested_concat, nested_detach, nested_numpify,
nested_truncate, )
from paddlenlp.trainer.utils.helper import nested_detach
from paddlenlp.utils.log import logger
from ppdiffusers.training_utils import unwrap_model

from .frame_dataset import VideoFrameDataset
from .webvid_dataset import WebVidDataset
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import numpy as np
import os
from PIL import Image
import paddle
from ppdiffusers import LVDMUncondPipeline

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import numpy as np
import os
from PIL import Image
import paddle
from ppdiffusers import LVDMTextToVideoPipeline

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import paddle
import torch

from paddlenlp.utils.downloader import get_path_from_url

try:
from omegaconf import OmegaConf
except ImportError:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import paddle
import torch

from paddlenlp.utils.downloader import get_path_from_url

try:
from omegaconf import OmegaConf
except ImportError:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from lvdm import (
LatentVideoDiffusion,
LatentVideoDiffusionTrainer,
VideoFrameDataset,
WebVidDataset, )
from lvdm.lvdm_args_text2video import WebVidDatasetArguments, TrainerArguments, ModelArguments

Expand Down
4 changes: 0 additions & 4 deletions ppdiffusers/ppdiffusers/models/lvdm_attention_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
_ppxformers_available = False

import math
from typing import Optional, Any
from einops import rearrange, repeat
from .lvdm_util import GEGLU, exists, default, Normalize, zero_module, normalization, conv_nd
from ..utils.initializer_utils import xavier_uniform_, constant_
Expand Down Expand Up @@ -189,9 +188,6 @@ def forward(self, x, context=None, mask=None):


class MemoryEfficientCrossAttention(paddle.nn.Layer):
"""https://github.com/MatthieuTPHR/diffusers/blob/d80b531ff8060ec1ea982b65a1b8df70f73aa67c/src/diffusers/models/attention.py#L223
"""

def __init__(self,
query_dim,
context_dim=None,
Expand Down
5 changes: 1 addition & 4 deletions ppdiffusers/ppdiffusers/models/lvdm_unet_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@
from paddle.distributed.fleet.utils import recompute

from abc import abstractmethod
import math
from einops import rearrange
from functools import partial
import numpy as np

from .lvdm_util import conv_nd, linear, avg_pool_nd, zero_module, normalization, timestep_embedding, nonlinearity
from .lvdm_attention_temporal import STAttentionBlock
from .lvdm_attention_temporal import SpatialTemporalTransformer

from dataclasses import dataclass
from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import BaseOutput, apply_forward_hook
from ..utils import BaseOutput
from .modeling_utils import ModelMixin


Expand Down
4 changes: 1 addition & 3 deletions ppdiffusers/ppdiffusers/models/lvdm_vae.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import nntplib
import paddle
from .lvdm_distributions import DiagonalGaussianDistribution
from .lvdm_aemodules3d import SamePadConv3d, Encoder, Decoder

from dataclasses import dataclass
from ..configuration_utils import ConfigMixin, register_to_config
from ..utils import BaseOutput, apply_forward_hook
from ..utils import BaseOutput
from .modeling_utils import ModelMixin
from .vae import DecoderOutput

Expand Down
2 changes: 1 addition & 1 deletion ppdiffusers/ppdiffusers/pipelines/lvdm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.

from dataclasses import dataclass
from typing import List, Optional, Union
from typing import List, Union

import numpy as np
import paddle
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@
import inspect
from typing import Any, Callable, Dict, List, Optional, Union
import os
from tqdm import trange
import numpy as np
from einops import rearrange

import paddle
from packaging import version

from paddlenlp.transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from paddlenlp.transformers import CLIPTextModel, CLIPTokenizer

from ...configuration_utils import FrozenDict
from ...models import LVDMAutoencoderKL, LVDMUNet3DModel
Expand Down
30 changes: 2 additions & 28 deletions ppdiffusers/ppdiffusers/pipelines/lvdm/video_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import accimage
except ImportError:
accimage = None
from PIL import Image, ImageEnhance, ImageOps
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple, Union
from PIL import Image
from typing import Any, Dict, List, Optional, Tuple, Union
import math
try:
import av
Expand Down Expand Up @@ -53,8 +53,6 @@ def write_video(filename: str,
audio_codec (str): the name of the audio codec, i.e. "mp3", "aac", etc.
audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream
"""
# >>> if not torch.jit.is_scripting() and not torch.jit.is_tracing():
# _log_api_usage_once(write_video)
_check_av_available()
video_array = paddle.to_tensor(data=video_array).astype('uint8').numpy()
if isinstance(fps, float):
Expand Down Expand Up @@ -131,8 +129,6 @@ def make_grid(tensor: Union[paddle.Tensor, List[paddle.Tensor]],
Returns:
grid (Tensor): the tensor containing grid of images.
"""
# >>> if not torch.jit.is_scripting() and not torch.jit.is_tracing():
# _log_api_usage_once(make_grid)
if not paddle.is_tensor(x=tensor):
if isinstance(tensor, list):
for t in tensor:
Expand Down Expand Up @@ -162,9 +158,6 @@ def make_grid(tensor: Union[paddle.Tensor, List[paddle.Tensor]],

def norm_ip(img, low, high):
img.clip_(min=low, max=high)
# """Class Method: *.sub_, not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*, and convert manually"""
# """Class Method: *.div_, not convert, please check whether it is torch.Tensor.*/Optimizer.*/nn.Module.*, and convert manually"""
# >>> img.sub_(low).div_(max(high - low, 1e-05))
img = img.substract(low).divide(max(high - low, 1e-05))

def norm_range(t, value_range):
Expand Down Expand Up @@ -241,40 +234,25 @@ def to_tensor(pic) -> paddle.Tensor:
Returns:
Tensor: Converted image.
"""
# >>> if not torch.jit.is_scripting() and not torch.jit.is_tracing():
# _log_api_usage_once(to_tensor)
# if not (F_pil._is_pil_image(pic) or _is_numpy(pic)):
# raise TypeError(f'pic should be PIL Image or ndarray. Got {type(pic)}')
# if _is_numpy(pic) and not _is_numpy_image(pic):
# raise ValueError(
# f'pic should be 2/3 dimensional. Got {pic.ndim} dimensions.')
default_float_dtype = paddle.get_default_dtype()
if isinstance(pic, np.ndarray):
if pic.ndim == 2:
pic = pic[:, :, (None)]
img = paddle.to_tensor(data=pic.transpose((2, 0, 1)))
# >>> if isinstance(img, torch.ByteTensor):
if img.dtype == paddle.uint8:
return paddle.divide(
img.cast(default_float_dtype),
paddle.to_tensor(
255, dtype=paddle.float32))
else:
return img
# if accimage is not None and isinstance(pic, accimage.Image):
# nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.
# float32)
# pic.copyto(nppic)
# return paddle.to_tensor(data=nppic).cast(default_float_dtype)
mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32}
img = paddle.to_tensor(data=np.array(
pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
if pic.mode == '1':
img = 255 * img
# img = img.reshape([pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic)])
img = img.reshape([pic.size[1], pic.size[0], get_image_num_channels(pic)])
img = img.transpose(perm=(2, 0, 1))
# >>> if isinstance(img, torch.ByteTensor):
if img.dtype == paddle.uint8:
return paddle.divide(img.cast(default_float_dtype), 255)
else:
Expand Down Expand Up @@ -341,15 +319,13 @@ def npz_to_video_grid(data_path,
if nrow is None:
nrow = int(np.ceil(np.sqrt(n)))
if verbose:
# >>> frame_grids = [torchvision.utils.make_grid(fs, nrow=nrow) for fs in tqdm(frame_grids, desc='Making grids')]
frame_grids = [
make_grid(
fs, nrow=nrow) for fs in tqdm(
frame_grids, desc='Making grids')
]

else:
# >>> frame_grids = [torchvision.utils.make_grid(fs, nrow=nrow) for fs in frame_grids]
frame_grids = [make_grid(fs, nrow=nrow) for fs in frame_grids]

if os.path.dirname(out_path) != '':
Expand All @@ -365,8 +341,6 @@ def npz_to_video_grid(data_path,
dtype = (paddle.stack(x=frame_grids) * 255).dtype
frame_grids = (paddle.stack(x=frame_grids) * 255).transpose(
perm=[0, 2, 3, 1]).cast(dtype)
# >>> torchvision.io.write_video(out_path, frame_grids, fps=fps, video_codec=
# 'h264', options={'crf': '10'})
write_video(
out_path,
frame_grids,
Expand Down

0 comments on commit 373cfd2

Please sign in to comment.