diff --git a/ppgan/apps/styleganv2fitting_predictor.py b/ppgan/apps/styleganv2fitting_predictor.py index 5c680d0bf40360..930a94a610e072 100644 --- a/ppgan/apps/styleganv2fitting_predictor.py +++ b/ppgan/apps/styleganv2fitting_predictor.py @@ -38,14 +38,12 @@ def make_image(tensor): class StyleGANv2FittingPredictor(StyleGANv2Predictor): - def run( - self, + def run(self, image, need_align=False, start_lr=0.1, final_lr=0.025, - latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11], # for ffhq (0~17) + latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], step=100, mse_weight=1, pre_latent=None): diff --git a/ppgan/datasets/animeganv2_dataset.py b/ppgan/datasets/animeganv2_dataset.py index 66a58ff7778c5a..7f70ef8bfdd18c 100644 --- a/ppgan/datasets/animeganv2_dataset.py +++ b/ppgan/datasets/animeganv2_dataset.py @@ -36,9 +36,9 @@ def __init__(self, """Initialize this dataset class. Args: - cfg (dict) -- stores all the experiment flags + dataroot (dict): Directory of dataset. + """ - # self.cfg = cfg self.root = dataroot self.style = style diff --git a/ppgan/datasets/firstorder_dataset.py b/ppgan/datasets/firstorder_dataset.py index f41733a04d9df8..5097273ce607fc 100755 --- a/ppgan/datasets/firstorder_dataset.py +++ b/ppgan/datasets/firstorder_dataset.py @@ -239,8 +239,7 @@ def __getitem__(self, idx): out['driving'] = out['source'] out['source'] = buf else: - video = np.stack(video_array, axis=0).astype( - np.float32) / 255.0 + video = np.stack(video_array, axis=0).astype(np.float32) / 255.0 out['video'] = video.transpose(3, 0, 1, 2) out['name'] = video_name return out diff --git a/ppgan/datasets/mpr_dataset.py b/ppgan/datasets/mpr_dataset.py index 8c243cbd4d4a2a..fad866dabeeecd 100644 --- a/ppgan/datasets/mpr_dataset.py +++ b/ppgan/datasets/mpr_dataset.py @@ -1,16 +1,6 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# code was heavily based on https://github.com/swz30/MPRNet +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/swz30/MPRNet/blob/main/LICENSE.md import os import random diff --git a/ppgan/datasets/starganv2_dataset.py b/ppgan/datasets/starganv2_dataset.py index cd1621e07cef52..0985b13c62362e 100755 --- a/ppgan/datasets/starganv2_dataset.py +++ b/ppgan/datasets/starganv2_dataset.py @@ -1,3 +1,6 @@ +# code was heavily based on https://github.com/clovaai/stargan-v2 +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/clovaai/stargan-v2#license import paddle from .base_dataset import BaseDataset @@ -14,8 +17,11 @@ def listdir(dname): - fnames = list(chain(*[list(Path(dname).rglob('*.' + ext)) - for ext in ['png', 'jpg', 'jpeg', 'JPG']])) + fnames = list( + chain(*[ + list(Path(dname).rglob('*.' + ext)) + for ext in ['png', 'jpg', 'jpeg', 'JPG'] + ])) return fnames @@ -97,7 +103,6 @@ def __len__(self): return len(self.targets) - @DATASETS.register() class StarGANv2Dataset(BaseDataset): """ @@ -120,15 +125,16 @@ def __init__(self, dataroot, is_train, preprocess, test_count=0): else: files = os.listdir(self.dataroot) if 'src' in files and 'ref' in files: - self.src_loader = ImageFolder(os.path.join(self.dataroot, 'src')) - self.ref_loader = ImageFolder(os.path.join(self.dataroot, 'ref')) + self.src_loader = ImageFolder(os.path.join( + self.dataroot, 'src')) + self.ref_loader = ImageFolder(os.path.join( + self.dataroot, 'ref')) else: self.src_loader = ImageFolder(self.dataroot) self.ref_loader = ImageFolder(self.dataroot) self.counts = min(test_count, len(self.src_loader)) self.counts = min(self.counts, len(self.ref_loader)) - def _fetch_inputs(self): try: x, y = next(self.iter_src) @@ -136,7 +142,7 @@ def _fetch_inputs(self): self.iter_src = iter(self.src_loader) x, y = next(self.iter_src) return x, y - + def _fetch_refs(self): try: x, x2, y = next(self.iter_ref) @@ -165,7 +171,7 @@ def __getitem__(self, idx): 'ref_path': x_ref, 'ref_cls': y_ref, } - + if hasattr(self, 'preprocess') and self.preprocess: datas = self.preprocess(datas) @@ -173,6 +179,6 @@ def __getitem__(self, idx): def __len__(self): return self.counts - + def prepare_data_infos(self, dataroot): pass diff --git a/ppgan/models/animeganv2_model.py b/ppgan/models/animeganv2_model.py index 7bceb36c70d0ef..c2ee5de2e6bbec 100644 --- a/ppgan/models/animeganv2_model.py +++ b/ppgan/models/animeganv2_model.py @@ -27,6 +27,8 @@ @MODELS.register() class AnimeGANV2Model(BaseModel): + """ This class implements the AnimeGANV2 model. + """ def __init__(self, generator, discriminator=None, @@ -40,8 +42,10 @@ def __init__(self, tv_weight=1.): """Initialize the AnimeGANV2 class. - Parameters: - opt (config dict)-- stores all the experiment flags; needs to be a subclass of Dict + Args: + generator (dict): config of generator. + discriminator (dict): config of discriminator. + gan_criterion (dict): config of gan criterion. """ super(AnimeGANV2Model, self).__init__() self.g_adv_weight = g_adv_weight @@ -54,7 +58,7 @@ def __init__(self, self.nets['netG'] = build_generator(generator) init_weights(self.nets['netG']) - # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc + # define a discriminator if self.is_train: self.nets['netD'] = build_discriminator(discriminator) init_weights(self.nets['netD']) @@ -87,14 +91,14 @@ def setup_input(self, input): def forward(self): """Run forward pass; called by both functions and .""" - self.fake = self.nets['netG'](self.real) # G(A) + self.fake = self.nets['netG'](self.real) # put items to visual dict self.visual_items['real'] = self.real self.visual_items['fake'] = self.fake def test(self): - self.fake = self.nets['netG'](self.real) # G(A) + self.fake = self.nets['netG'](self.real) # put items to visual dict self.visual_items['real'] = self.real diff --git a/ppgan/models/discriminators/dcdiscriminator.py b/ppgan/models/discriminators/dcdiscriminator.py index f66b49a84ebca5..cd964cf91564ad 100644 --- a/ppgan/models/discriminators/dcdiscriminator.py +++ b/ppgan/models/discriminators/dcdiscriminator.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/aidotse/Team-Haste +# MIT License +# Copyright (c) 2020 AI Sweden + import paddle import functools import numpy as np @@ -31,15 +35,14 @@ def __init__(self, input_nc, ndf=64, norm_type='instance'): """Construct a DCGAN discriminator Parameters: - input_nc (int) -- the number of channels in input images - ndf (int) -- the number of filters in the last conv layer - norm_type (str) -- normalization layer type + input_nc (int): the number of channels in input images + ndf (int): the number of filters in the last conv layer + norm_type (str): normalization layer type """ super(DCDiscriminator, self).__init__() norm_layer = build_norm_layer(norm_type) - if type( - norm_layer - ) == functools.partial: # no need to use bias as BatchNorm2d has affine parameters + if type(norm_layer) == functools.partial: + # no need to use bias as BatchNorm2d has affine parameters use_bias = norm_layer.func == nn.BatchNorm2D else: use_bias = norm_layer == nn.BatchNorm2D @@ -48,29 +51,30 @@ def __init__(self, input_nc, ndf=64, norm_type='instance'): padw = 1 sequence = [ - nn.Conv2D(input_nc, - ndf, - kernel_size=kw, - stride=2, - padding=padw, - bias_attr=use_bias), - nn.LeakyReLU(0.2) - ] + nn.Conv2D(input_nc, + ndf, + kernel_size=kw, + stride=2, + padding=padw, + bias_attr=use_bias), + nn.LeakyReLU(0.2) + ] nf_mult = 1 nf_mult_prev = 1 n_downsampling = 4 - for n in range(1, n_downsampling): # gradually increase the number of filters + # gradually increase the number of filters + for n in range(1, n_downsampling): nf_mult_prev = nf_mult nf_mult = min(2**n, 8) if norm_type == 'batch': sequence += [ nn.Conv2D(ndf * nf_mult_prev, - ndf * nf_mult, - kernel_size=kw, - stride=2, - padding=padw), + ndf * nf_mult, + kernel_size=kw, + stride=2, + padding=padw), BatchNorm2D(ndf * nf_mult), nn.LeakyReLU(0.2) ] @@ -88,13 +92,14 @@ def __init__(self, input_nc, ndf=64, norm_type='instance'): nf_mult_prev = nf_mult + # output 1 channel prediction map sequence += [ - nn.Conv2D(ndf * nf_mult_prev, - 1, - kernel_size=kw, - stride=1, - padding=0) - ] # output 1 channel prediction map + nn.Conv2D(ndf * nf_mult_prev, + 1, + kernel_size=kw, + stride=1, + padding=0) + ] self.model = nn.Sequential(*sequence) diff --git a/ppgan/models/discriminators/discriminator_animegan.py b/ppgan/models/discriminators/discriminator_animegan.py index d0c7badea1dabe..c06ad72f7d8aa3 100644 --- a/ppgan/models/discriminators/discriminator_animegan.py +++ b/ppgan/models/discriminators/discriminator_animegan.py @@ -1,16 +1,6 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# code was heavily based on https://github.com/TachibanaYoshino/AnimeGANv2 +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/TachibanaYoshino/AnimeGANv2#license import paddle.nn as nn import paddle.nn.functional as F diff --git a/ppgan/models/discriminators/discriminator_starganv2.py b/ppgan/models/discriminators/discriminator_starganv2.py index a2ff50eb0fc2a1..4525d4b95d93fa 100644 --- a/ppgan/models/discriminators/discriminator_starganv2.py +++ b/ppgan/models/discriminators/discriminator_starganv2.py @@ -1,3 +1,6 @@ +# code was heavily based on https://github.com/clovaai/stargan-v2 +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/clovaai/stargan-v2#license import paddle.nn as nn import paddle @@ -18,7 +21,7 @@ def __init__(self, img_size=256, num_domains=2, max_conv_dim=512): repeat_num = int(np.log2(img_size)) - 2 for _ in range(repeat_num): - dim_out = min(dim_in*2, max_conv_dim) + dim_out = min(dim_in * 2, max_conv_dim) blocks += [ResBlk(dim_in, dim_out, downsample=True)] dim_in = dim_out diff --git a/ppgan/models/discriminators/discriminator_styleganv2.py b/ppgan/models/discriminators/discriminator_styleganv2.py index 038d39ab5f2437..8acea70ede149f 100644 --- a/ppgan/models/discriminators/discriminator_styleganv2.py +++ b/ppgan/models/discriminators/discriminator_styleganv2.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/rosinality/stylegan2-pytorch +# MIT License +# Copyright (c) 2019 Kim Seonghyeon + import math import paddle import paddle.nn as nn diff --git a/ppgan/models/firstorder_model.py b/ppgan/models/firstorder_model.py index a8a59f659cc4c2..d1d4e81600d356 100755 --- a/ppgan/models/firstorder_model.py +++ b/ppgan/models/firstorder_model.py @@ -13,6 +13,8 @@ # limitations under the License. # code was heavily based on https://github.com/AliaksandrSiarohin/first-order-model +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/AliaksandrSiarohin/first-order-model/blob/master/LICENSE.md import paddle diff --git a/ppgan/models/generators/dcgenerator.py b/ppgan/models/generators/dcgenerator.py index 5bbdbb33e84358..abbc633b9cbbad 100644 --- a/ppgan/models/generators/dcgenerator.py +++ b/ppgan/models/generators/dcgenerator.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/aidotse/Team-Haste +# MIT License +# Copyright (c) 2020 AI Sweden + import paddle import paddle.nn as nn import functools @@ -26,7 +30,6 @@ class DCGenerator(nn.Layer): """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations. - code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style) """ def __init__(self, input_nz, @@ -38,12 +41,12 @@ def __init__(self, """Construct a DCGenerator generator Args: - input_nz (int) -- the number of dimension in input noise - input_nc (int) -- the number of channels in input images - output_nc (int) -- the number of channels in output images - ngf (int) -- the number of filters in the last conv layer - norm_layer -- normalization layer - padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero + input_nz (int): the number of dimension in input noise + input_nc (int): the number of channels in input images + output_nc (int): the number of channels in output images + ngf (int): the number of filters in the last conv layer + norm_layer: normalization layer + padding_type (str): the name of padding layer in conv layers: reflect | replicate | zero """ super(DCGenerator, self).__init__() @@ -59,65 +62,66 @@ def __init__(self, if norm_type == 'batch': model = [ nn.Conv2DTranspose(input_nz, - ngf * mult, - kernel_size=4, - stride=1, - padding=0, - bias_attr=use_bias), + ngf * mult, + kernel_size=4, + stride=1, + padding=0, + bias_attr=use_bias), BatchNorm2D(ngf * mult), nn.ReLU() ] else: model = [ nn.Conv2DTranspose(input_nz, - ngf * mult, - kernel_size=4, - stride=1, - padding=0, - bias_attr=use_bias), + ngf * mult, + kernel_size=4, + stride=1, + padding=0, + bias_attr=use_bias), norm_layer(ngf * mult), nn.ReLU() ] - for i in range(1,n_downsampling): # add upsampling layers + # add upsampling layers + for i in range(1, n_downsampling): mult = 2**(n_downsampling - i) - output_size = 2**(i+2) + output_size = 2**(i + 2) if norm_type == 'batch': model += [ - nn.Conv2DTranspose(ngf * mult, - ngf * mult//2, - kernel_size=4, - stride=2, - padding=1, - bias_attr=use_bias), - BatchNorm2D(ngf * mult//2), - nn.ReLU() - ] + nn.Conv2DTranspose(ngf * mult, + ngf * mult // 2, + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias), + BatchNorm2D(ngf * mult // 2), + nn.ReLU() + ] else: model += [ nn.Conv2DTranspose(ngf * mult, - int(ngf * mult//2), - kernel_size=4, - stride=2, - padding=1, - bias_attr=use_bias), + int(ngf * mult // 2), + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias), norm_layer(int(ngf * mult // 2)), nn.ReLU() ] output_size = 2**(6) model += [ - nn.Conv2DTranspose(ngf , - output_nc, - kernel_size=4, - stride=2, - padding=1, - bias_attr=use_bias), - nn.Tanh() - ] + nn.Conv2DTranspose(ngf, + output_nc, + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias), + nn.Tanh() + ] self.model = nn.Sequential(*model) def forward(self, x): """Standard forward""" - return self.model(x) \ No newline at end of file + return self.model(x) diff --git a/ppgan/models/generators/deep_conv.py b/ppgan/models/generators/deep_conv.py index 9712c9f6b1c505..0e757cbecbe8c7 100644 --- a/ppgan/models/generators/deep_conv.py +++ b/ppgan/models/generators/deep_conv.py @@ -21,32 +21,33 @@ @GENERATORS.register() class DeepConvGenerator(nn.Layer): - """Create a Deep Convolutional generator""" + """Create a Deep Convolutional generator + Refer to https://arxiv.org/abs/1511.06434 + """ def __init__(self, latent_dim, output_nc, size=64, ngf=64): """Construct a Deep Convolutional generator Args: - latent_dim (int) -- the number of latent dimension - output_nc (int) -- the number of channels in output images - size (int) -- size of output tensor - ngf (int) -- the number of filters in the last conv layer - - Refer to https://arxiv.org/abs/1511.06434 + latent_dim (int): the number of latent dimension + output_nc (int): the number of channels in output images + size (int): size of output tensor + ngf (int): the number of filters in the last conv layer """ super(DeepConvGenerator, self).__init__() self.latent_dim = latent_dim self.ngf = ngf self.init_size = size // 4 - self.l1 = nn.Sequential(nn.Linear(latent_dim, ngf*2 * self.init_size ** 2)) + self.l1 = nn.Sequential( + nn.Linear(latent_dim, ngf * 2 * self.init_size**2)) self.conv_blocks = nn.Sequential( - nn.BatchNorm2D(ngf*2), + nn.BatchNorm2D(ngf * 2), nn.Upsample(scale_factor=2), - nn.Conv2D(ngf*2, ngf*2, 3, stride=1, padding=1), - nn.BatchNorm2D(ngf*2, 0.2), + nn.Conv2D(ngf * 2, ngf * 2, 3, stride=1, padding=1), + nn.BatchNorm2D(ngf * 2, 0.2), nn.LeakyReLU(0.2), nn.Upsample(scale_factor=2), - nn.Conv2D(ngf*2, ngf, 3, stride=1, padding=1), + nn.Conv2D(ngf * 2, ngf, 3, stride=1, padding=1), nn.BatchNorm2D(ngf, 0.2), nn.LeakyReLU(0.2), nn.Conv2D(ngf, output_nc, 3, stride=1, padding=1), @@ -55,24 +56,36 @@ def __init__(self, latent_dim, output_nc, size=64, ngf=64): def random_inputs(self, batch_size): return paddle.randn([batch_size, self.latent_dim]) - + def forward(self, z): out = self.l1(z) - out = out.reshape([out.shape[0], self.ngf * 2, self.init_size, self.init_size]) + out = out.reshape( + [out.shape[0], self.ngf * 2, self.init_size, self.init_size]) img = self.conv_blocks(out) return img @GENERATORS.register() class ConditionalDeepConvGenerator(DeepConvGenerator): + """Create a Conditional Deep Convolutional generator + """ def __init__(self, latent_dim, output_nc, n_class=10, **kwargs): - super(ConditionalDeepConvGenerator, self).__init__(latent_dim + n_class, output_nc, **kwargs) + """Construct a Conditional Deep Convolutional generator + Args: + latent_dim (int): the number of latent dimension + output_nc (int): the number of channels in output images + n_class (int): the number of class + """ + super(ConditionalDeepConvGenerator, + self).__init__(latent_dim + n_class, output_nc, **kwargs) self.n_class = n_class self.latent_dim = latent_dim - + def random_inputs(self, batch_size): - return_list = [super(ConditionalDeepConvGenerator, self).random_inputs(batch_size)] + return_list = [ + super(ConditionalDeepConvGenerator, self).random_inputs(batch_size) + ] class_id = paddle.randint(0, self.n_class, [batch_size]) return return_list + [class_id] @@ -82,5 +95,5 @@ def forward(self, x, class_id=None): class_id = F.one_hot(class_id, self.n_class).astype('float32') class_id = class_id.reshape([x.shape[0], -1]) x = paddle.concat([x, class_id], 1) - + return super(ConditionalDeepConvGenerator, self).forward(x) diff --git a/ppgan/models/generators/generater_animegan.py b/ppgan/models/generators/generater_animegan.py index a2b09fe453e890..2d3f3aa037cc76 100644 --- a/ppgan/models/generators/generater_animegan.py +++ b/ppgan/models/generators/generater_animegan.py @@ -1,16 +1,6 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# code was heavily based on https://github.com/TachibanaYoshino/AnimeGANv2 +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/TachibanaYoshino/AnimeGANv2#license import paddle import paddle.nn as nn diff --git a/ppgan/models/generators/generator_pixel2style2pixel.py b/ppgan/models/generators/generator_pixel2style2pixel.py index 1651cc54c01b45..04f57ee82a04c6 100644 --- a/ppgan/models/generators/generator_pixel2style2pixel.py +++ b/ppgan/models/generators/generator_pixel2style2pixel.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/eladrich/pixel2style2pixel +# MIT License +# Copyright (c) 2020 Elad Richardson, Yuval Alaluf + import math import numpy as np import paddle @@ -41,7 +45,8 @@ class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])): def get_block(in_channel, depth, num_units, stride=2): - return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)] + return [Bottleneck(in_channel, depth, stride) + ] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)] def get_blocks(num_layers): @@ -67,7 +72,9 @@ def get_blocks(num_layers): get_block(in_channel=256, depth=512, num_units=3) ] else: - raise ValueError("Invalid number of layers: {}. Must be one of [50, 100, 152]".format(num_layers)) + raise ValueError( + "Invalid number of layers: {}. Must be one of [50, 100, 152]". + format(num_layers)) return blocks @@ -75,9 +82,17 @@ class SEModule(nn.Layer): def __init__(self, channels, reduction): super(SEModule, self).__init__() self.avg_pool = nn.AdaptiveAvgPool2D(1) - self.fc1 = nn.Conv2D(channels, channels // reduction, kernel_size=1, padding=0, bias_attr=False) + self.fc1 = nn.Conv2D(channels, + channels // reduction, + kernel_size=1, + padding=0, + bias_attr=False) self.relu = nn.ReLU() - self.fc2 = nn.Conv2D(channels // reduction, channels, kernel_size=1, padding=0, bias_attr=False) + self.fc2 = nn.Conv2D(channels // reduction, + channels, + kernel_size=1, + padding=0, + bias_attr=False) self.sigmoid = nn.Sigmoid() def forward(self, x): @@ -98,13 +113,13 @@ def __init__(self, in_channel, depth, stride): else: self.shortcut_layer = nn.Sequential( nn.Conv2D(in_channel, depth, (1, 1), stride, bias_attr=False), - nn.BatchNorm2D(depth) - ) + nn.BatchNorm2D(depth)) self.res_layer = nn.Sequential( nn.BatchNorm2D(in_channel), - nn.Conv2D(in_channel, depth, (3, 3), (1, 1), 1, bias_attr=False), nn.PReLU(depth), - nn.Conv2D(depth, depth, (3, 3), stride, 1, bias_attr=False), nn.BatchNorm2D(depth) - ) + nn.Conv2D(in_channel, depth, (3, 3), (1, 1), 1, bias_attr=False), + nn.PReLU(depth), + nn.Conv2D(depth, depth, (3, 3), stride, 1, bias_attr=False), + nn.BatchNorm2D(depth)) def forward(self, x): shortcut = self.shortcut_layer(x) @@ -120,16 +135,13 @@ def __init__(self, in_channel, depth, stride): else: self.shortcut_layer = nn.Sequential( nn.Conv2D(in_channel, depth, (1, 1), stride, bias_attr=False), - nn.BatchNorm2D(depth) - ) + nn.BatchNorm2D(depth)) self.res_layer = nn.Sequential( nn.BatchNorm2D(in_channel), nn.Conv2D(in_channel, depth, (3, 3), (1, 1), 1, bias_attr=False), nn.PReLU(depth), nn.Conv2D(depth, depth, (3, 3), stride, 1, bias_attr=False), - nn.BatchNorm2D(depth), - SEModule(depth, 16) - ) + nn.BatchNorm2D(depth), SEModule(depth, 16)) def forward(self, x): shortcut = self.shortcut_layer(x) @@ -144,8 +156,10 @@ def __init__(self, in_c, out_c, spatial): self.spatial = spatial num_pools = int(np.log2(spatial)) modules = [] - modules += [nn.Conv2D(in_c, out_c, kernel_size=3, stride=2, padding=1), - nn.LeakyReLU()] + modules += [ + nn.Conv2D(in_c, out_c, kernel_size=3, stride=2, padding=1), + nn.LeakyReLU() + ] for i in range(num_pools - 1): modules += [ nn.Conv2D(out_c, out_c, kernel_size=3, stride=2, padding=1), @@ -164,22 +178,23 @@ def forward(self, x): class GradualStyleEncoder(nn.Layer): def __init__(self, num_layers, mode='ir', opts=None): super(GradualStyleEncoder, self).__init__() - assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152' + assert num_layers in [50, 100, + 152], 'num_layers should be 50,100, or 152' assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se' blocks = get_blocks(num_layers) if mode == 'ir': unit_module = BottleneckIR elif mode == 'ir_se': unit_module = BottleneckIRSE - self.input_layer = nn.Sequential(nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False), - nn.BatchNorm2D(64), - nn.PReLU(64)) + self.input_layer = nn.Sequential( + nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False), + nn.BatchNorm2D(64), nn.PReLU(64)) modules = [] for block in blocks: for bottleneck in block: - modules.append(unit_module(bottleneck.in_channel, - bottleneck.depth, - bottleneck.stride)) + modules.append( + unit_module(bottleneck.in_channel, bottleneck.depth, + bottleneck.stride)) self.body = nn.Sequential(*modules) self.styles = nn.LayerList() @@ -214,7 +229,8 @@ def _upsample_add(self, x, y): So we choose bilinear upsample which supports arbitrary output sizes. ''' _, _, H, W = y.shape - return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y + return F.interpolate( + x, size=(H, W), mode='bilinear', align_corners=True) + y def forward(self, x): x = self.input_layer(x) @@ -249,24 +265,25 @@ class BackboneEncoderUsingLastLayerIntoW(nn.Layer): def __init__(self, num_layers, mode='ir', opts=None): super(BackboneEncoderUsingLastLayerIntoW, self).__init__() print('Using BackboneEncoderUsingLastLayerIntoW') - assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152' + assert num_layers in [50, 100, + 152], 'num_layers should be 50,100, or 152' assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se' blocks = get_blocks(num_layers) if mode == 'ir': unit_module = BottleneckIR elif mode == 'ir_se': unit_module = BottleneckIRSE - self.input_layer = nn.Sequential(nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False), - nn.BatchNorm2D(64), - nn.PReLU(64)) + self.input_layer = nn.Sequential( + nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False), + nn.BatchNorm2D(64), nn.PReLU(64)) self.output_pool = nn.AdaptiveAvgPool2D((1, 1)) self.linear = EqualLinear(512, 512, lr_mul=1) modules = [] for block in blocks: for bottleneck in block: - modules.append(unit_module(bottleneck.in_channel, - bottleneck.depth, - bottleneck.stride)) + modules.append( + unit_module(bottleneck.in_channel, bottleneck.depth, + bottleneck.stride)) self.body = nn.Sequential(*modules) def forward(self, x): @@ -282,16 +299,17 @@ class BackboneEncoderUsingLastLayerIntoWPlus(nn.Layer): def __init__(self, num_layers, mode='ir', opts=None): super(BackboneEncoderUsingLastLayerIntoWPlus, self).__init__() print('Using BackboneEncoderUsingLastLayerIntoWPlus') - assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152' + assert num_layers in [50, 100, + 152], 'num_layers should be 50,100, or 152' assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se' blocks = get_blocks(num_layers) if mode == 'ir': unit_module = BottleneckIR elif mode == 'ir_se': unit_module = BottleneckIRSE - self.input_layer = nn.Sequential(nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False), - nn.BatchNorm2D(64), - nn.PReLU(64)) + self.input_layer = nn.Sequential( + nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False), + nn.BatchNorm2D(64), nn.PReLU(64)) self.output_layer_2 = nn.Sequential(nn.BatchNorm2D(512), nn.AdaptiveAvgPool2D((7, 7)), Flatten(), @@ -300,9 +318,9 @@ def __init__(self, num_layers, mode='ir', opts=None): modules = [] for block in blocks: for bottleneck in block: - modules.append(unit_module(bottleneck.in_channel, - bottleneck.depth, - bottleneck.stride)) + modules.append( + unit_module(bottleneck.in_channel, bottleneck.depth, + bottleneck.stride)) self.body = nn.Sequential(*modules) def forward(self, x): @@ -321,15 +339,19 @@ def __init__(self, opts): self.set_opts(opts) # Define architecture self.encoder = self.set_encoder() - self.decoder = StyleGANv2Generator(opts.size, opts.style_dim, opts.n_mlp, opts.channel_multiplier) + self.decoder = StyleGANv2Generator(opts.size, opts.style_dim, + opts.n_mlp, opts.channel_multiplier) self.face_pool = nn.AdaptiveAvgPool2D((256, 256)) self.style_dim = self.decoder.style_dim self.n_latent = self.decoder.n_latent if self.opts.start_from_latent_avg: if self.opts.learn_in_w: - self.register_buffer('latent_avg', paddle.zeros([1, self.style_dim])) + self.register_buffer('latent_avg', + paddle.zeros([1, self.style_dim])) else: - self.register_buffer('latent_avg', paddle.zeros([1, self.n_latent, self.style_dim])) + self.register_buffer( + 'latent_avg', + paddle.zeros([1, self.n_latent, self.style_dim])) def set_encoder(self): if self.opts.encoder_type == 'GradualStyleEncoder': @@ -337,13 +359,22 @@ def set_encoder(self): elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoW': encoder = BackboneEncoderUsingLastLayerIntoW(50, 'ir_se', self.opts) elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoWPlus': - encoder = BackboneEncoderUsingLastLayerIntoWPlus(50, 'ir_se', self.opts) + encoder = BackboneEncoderUsingLastLayerIntoWPlus( + 50, 'ir_se', self.opts) else: - raise Exception('{} is not a valid encoders'.format(self.opts.encoder_type)) + raise Exception('{} is not a valid encoders'.format( + self.opts.encoder_type)) return encoder - def forward(self, x, resize=True, latent_mask=None, input_code=False, randomize_noise=True, - inject_latent=None, return_latents=False, alpha=None): + def forward(self, + x, + resize=True, + latent_mask=None, + input_code=False, + randomize_noise=True, + inject_latent=None, + return_latents=False, + alpha=None): if input_code: codes = x else: @@ -355,12 +386,12 @@ def forward(self, x, resize=True, latent_mask=None, input_code=False, randomize_ else: codes = codes + self.latent_avg.tile([codes.shape[0], 1, 1]) - if latent_mask is not None: for i in latent_mask: if inject_latent is not None: if alpha is not None: - codes[:, i] = alpha * inject_latent[:, i] + (1 - alpha) * codes[:, i] + codes[:, i] = alpha * inject_latent[:, i] + ( + 1 - alpha) * codes[:, i] else: codes[:, i] = inject_latent[:, i] else: diff --git a/ppgan/models/generators/generator_starganv2.py b/ppgan/models/generators/generator_starganv2.py index ad1aedbb42f12b..a8cef90cf88fcd 100755 --- a/ppgan/models/generators/generator_starganv2.py +++ b/ppgan/models/generators/generator_starganv2.py @@ -1,4 +1,6 @@ - +# code was heavily based on https://github.com/clovaai/stargan-v2 +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/clovaai/stargan-v2#license import paddle from paddle import nn import paddle.nn.functional as F @@ -13,25 +15,30 @@ FAN_WEIGHT_URL = "https://paddlegan.bj.bcebos.com/models/wing.pdparams" + class AvgPool2D(nn.Layer): """ - AvgPool2D + AvgPool2D Peplace avg_pool2d because paddle.grad will cause avg_pool2d to report an error when training. In the future Paddle framework will supports avg_pool2d and remove this class. """ def __init__(self): super(AvgPool2D, self).__init__() - self.filter = paddle.to_tensor([[1, 1], - [1, 1]], dtype='float32') + self.filter = paddle.to_tensor([[1, 1], [1, 1]], dtype='float32') def forward(self, x): - filter = self.filter.unsqueeze(0).unsqueeze(1).tile([x.shape[1], 1, 1, 1]) + filter = self.filter.unsqueeze(0).unsqueeze(1).tile( + [x.shape[1], 1, 1, 1]) return F.conv2d(x, filter, stride=2, padding=0, groups=x.shape[1]) / 4 class ResBlk(nn.Layer): - def __init__(self, dim_in, dim_out, actv=nn.LeakyReLU(0.2), - normalize=False, downsample=False): + def __init__(self, + dim_in, + dim_out, + actv=nn.LeakyReLU(0.2), + normalize=False, + downsample=False): super().__init__() self.actv = actv self.normalize = normalize @@ -43,8 +50,12 @@ def _build_weights(self, dim_in, dim_out): self.conv1 = nn.Conv2D(dim_in, dim_in, 3, 1, 1) self.conv2 = nn.Conv2D(dim_in, dim_out, 3, 1, 1) if self.normalize: - self.norm1 = nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True) - self.norm2 = nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True) + self.norm1 = nn.InstanceNorm2D(dim_in, + weight_attr=True, + bias_attr=True) + self.norm2 = nn.InstanceNorm2D(dim_in, + weight_attr=True, + bias_attr=True) if self.learned_sc: self.conv1x1 = nn.Conv2D(dim_in, dim_out, 1, 1, 0, bias_attr=False) @@ -76,8 +87,10 @@ def forward(self, x): class AdaIN(nn.Layer): def __init__(self, style_dim, num_features): super().__init__() - self.norm = nn.InstanceNorm2D(num_features, weight_attr=False, bias_attr=False) - self.fc = nn.Linear(style_dim, num_features*2) + self.norm = nn.InstanceNorm2D(num_features, + weight_attr=False, + bias_attr=False) + self.fc = nn.Linear(style_dim, num_features * 2) def forward(self, x, s): h = self.fc(s) @@ -88,8 +101,13 @@ def forward(self, x, s): class AdainResBlk(nn.Layer): - def __init__(self, dim_in, dim_out, style_dim=64, w_hpf=0, - actv=nn.LeakyReLU(0.2), upsample=False): + def __init__(self, + dim_in, + dim_out, + style_dim=64, + w_hpf=0, + actv=nn.LeakyReLU(0.2), + upsample=False): super().__init__() self.w_hpf = w_hpf self.actv = actv @@ -133,13 +151,13 @@ def forward(self, x, s): class HighPass(nn.Layer): def __init__(self, w_hpf): super(HighPass, self).__init__() - self.filter = paddle.to_tensor([[-1, -1, -1], - [-1, 8., -1], - [-1, -1, -1]]) / w_hpf + self.filter = paddle.to_tensor([[-1, -1, -1], [-1, 8., -1], + [-1, -1, -1]]) / w_hpf def forward(self, x): # filter = self.filter.unsqueeze(0).unsqueeze(1).repeat(x.size(1), 1, 1, 1) - filter = self.filter.unsqueeze(0).unsqueeze(1).tile([x.shape[1], 1, 1, 1]) + filter = self.filter.unsqueeze(0).unsqueeze(1).tile( + [x.shape[1], 1, 1, 1]) return F.conv2d(x, filter, padding=1, groups=x.shape[1]) @@ -154,30 +172,35 @@ def __init__(self, img_size=256, style_dim=64, max_conv_dim=512, w_hpf=1): self.decode = nn.LayerList() self.to_rgb = nn.Sequential( nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True), - nn.LeakyReLU(0.2), - nn.Conv2D(dim_in, 3, 1, 1, 0)) + nn.LeakyReLU(0.2), nn.Conv2D(dim_in, 3, 1, 1, 0)) # down/up-sampling blocks repeat_num = int(np.log2(img_size)) - 4 if w_hpf > 0: repeat_num += 1 for _ in range(repeat_num): - dim_out = min(dim_in*2, max_conv_dim) + dim_out = min(dim_in * 2, max_conv_dim) self.encode.append( ResBlk(dim_in, dim_out, normalize=True, downsample=True)) if len(self.decode) == 0: - self.decode.append(AdainResBlk(dim_out, dim_in, style_dim, - w_hpf=w_hpf, upsample=True)) + self.decode.append( + AdainResBlk(dim_out, + dim_in, + style_dim, + w_hpf=w_hpf, + upsample=True)) else: - self.decode.insert( - 0, AdainResBlk(dim_out, dim_in, style_dim, - w_hpf=w_hpf, upsample=True)) # stack-like + self.decode.insert(0, + AdainResBlk(dim_out, + dim_in, + style_dim, + w_hpf=w_hpf, + upsample=True)) # stack-like dim_in = dim_out # bottleneck blocks for _ in range(2): - self.encode.append( - ResBlk(dim_out, dim_out, normalize=True)) + self.encode.append(ResBlk(dim_out, dim_out, normalize=True)) self.decode.insert( 0, AdainResBlk(dim_out, dim_out, style_dim, w_hpf=w_hpf)) @@ -195,7 +218,9 @@ def forward(self, x, s, masks=None): x = block(x, s) if (masks is not None) and (x.shape[2] in [32, 64, 128]): mask = masks[0] if x.shape[2] in [32] else masks[1] - mask = F.interpolate(mask, size=[x.shape[2], x.shape[2]], mode='bilinear') + mask = F.interpolate(mask, + size=[x.shape[2], x.shape[2]], + mode='bilinear') x = x + self.hpf(mask * cache[x.shape[2]]) return self.to_rgb(x) @@ -214,13 +239,11 @@ def __init__(self, latent_dim=16, style_dim=64, num_domains=2): self.unshared = nn.LayerList() for _ in range(num_domains): - self.unshared.append(nn.Sequential(nn.Linear(512, 512), - nn.ReLU(), - nn.Linear(512, 512), - nn.ReLU(), - nn.Linear(512, 512), - nn.ReLU(), - nn.Linear(512, style_dim))) + self.unshared.append( + nn.Sequential(nn.Linear(512, 512), + nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), + nn.Linear(512, 512), nn.ReLU(), + nn.Linear(512, style_dim))) def forward(self, z, y): h = self.shared(z) @@ -231,7 +254,10 @@ def forward(self, z, y): idx = paddle.to_tensor(np.array(range(y.shape[0]))).astype('int') s = [] for i in range(idx.shape[0]): - s += [out[idx[i].numpy().astype(np.int).tolist()[0], y[i].numpy().astype(np.int).tolist()[0]]] + s += [ + out[idx[i].numpy().astype(np.int).tolist()[0], + y[i].numpy().astype(np.int).tolist()[0]] + ] s = paddle.stack(s) s = paddle.reshape(s, (s.shape[0], -1)) return s @@ -239,7 +265,11 @@ def forward(self, z, y): @GENERATORS.register() class StarGANv2Style(nn.Layer): - def __init__(self, img_size=256, style_dim=64, num_domains=2, max_conv_dim=512): + def __init__(self, + img_size=256, + style_dim=64, + num_domains=2, + max_conv_dim=512): super().__init__() dim_in = 2**14 // img_size blocks = [] @@ -247,7 +277,7 @@ def __init__(self, img_size=256, style_dim=64, num_domains=2, max_conv_dim=512): repeat_num = int(np.log2(img_size)) - 2 for _ in range(repeat_num): - dim_out = min(dim_in*2, max_conv_dim) + dim_out = min(dim_in * 2, max_conv_dim) blocks += [ResBlk(dim_in, dim_out, downsample=True)] dim_in = dim_out @@ -270,7 +300,10 @@ def forward(self, x, y): idx = paddle.to_tensor(np.array(range(y.shape[0]))).astype('int') s = [] for i in range(idx.shape[0]): - s += [out[idx[i].numpy().astype(np.int).tolist()[0], y[i].numpy().astype(np.int).tolist()[0]]] + s += [ + out[idx[i].numpy().astype(np.int).tolist()[0], + y[i].numpy().astype(np.int).tolist()[0]] + ] s = paddle.stack(s) s = paddle.reshape(s, (s.shape[0], -1)) return s @@ -278,15 +311,25 @@ def forward(self, x, y): @GENERATORS.register() class FAN(nn.Layer): - def __init__(self, num_modules=1, end_relu=False, num_landmarks=98, fname_pretrained=None): + def __init__(self, + num_modules=1, + end_relu=False, + num_landmarks=98, + fname_pretrained=None): super(FAN, self).__init__() self.num_modules = num_modules self.end_relu = end_relu # Base part - self.conv1 = CoordConvTh(256, 256, True, False, - in_channels=3, out_channels=64, - kernel_size=7, stride=2, padding=3) + self.conv1 = CoordConvTh(256, + 256, + True, + False, + in_channels=3, + out_channels=64, + kernel_size=7, + stride=2, + padding=3) self.bn1 = nn.BatchNorm2D(64) self.conv2 = ConvBlock(64, 128) self.conv3 = ConvBlock(128, 128) @@ -297,7 +340,7 @@ def __init__(self, num_modules=1, end_relu=False, num_landmarks=98, fname_pretra self.add_sublayer('top_m_0', ConvBlock(256, 256)) self.add_sublayer('conv_last0', nn.Conv2D(256, 256, 1, 1, 0)) self.add_sublayer('bn_end0', nn.BatchNorm2D(256)) - self.add_sublayer('l0', nn.Conv2D(256, num_landmarks+1, 1, 1, 0)) + self.add_sublayer('l0', nn.Conv2D(256, num_landmarks + 1, 1, 1, 0)) if fname_pretrained is not None: self.load_pretrained_weights(fname_pretrained) @@ -312,10 +355,12 @@ def load_pretrained_weights(self, fname): with open(fname, 'rb') as f: checkpoint = pickle.load(f) if six.PY2 else pickle.load( f, encoding='latin1') - + model_weights = self.state_dict() - model_weights.update({k: v for k, v in checkpoint['state_dict'].items() - if k in model_weights}) + model_weights.update({ + k: v + for k, v in checkpoint['state_dict'].items() if k in model_weights + }) self.set_state_dict(model_weights) def forward(self, x): @@ -330,8 +375,9 @@ def forward(self, x): tmp_out = None ll, boundary_channel = self._sub_layers['m0'](x, tmp_out) ll = self._sub_layers['top_m_0'](ll) - ll = F.relu(self._sub_layers['bn_end0'] - (self._sub_layers['conv_last0'](ll)), True) + ll = F.relu( + self._sub_layers['bn_end0'](self._sub_layers['conv_last0'](ll)), + True) # Predict heatmaps tmp_out = self._sub_layers['l0'](ll) @@ -345,12 +391,14 @@ def forward(self, x): def get_heatmap(self, x, b_preprocess=True): ''' outputs 0-1 normalized heatmap ''' x = F.interpolate(x, size=[256, 256], mode='bilinear') - x_01 = x*0.5 + 0.5 + x_01 = x * 0.5 + 0.5 outputs, _ = self(x_01) heatmaps = outputs[-1][:, :-1, :, :] scale_factor = x.shape[2] // heatmaps.shape[2] if b_preprocess: - heatmaps = F.interpolate(heatmaps, scale_factor=scale_factor, - mode='bilinear', align_corners=True) + heatmaps = F.interpolate(heatmaps, + scale_factor=scale_factor, + mode='bilinear', + align_corners=True) heatmaps = preprocess(heatmaps) return heatmaps diff --git a/ppgan/models/generators/generator_styleganv2.py b/ppgan/models/generators/generator_styleganv2.py index 72a6c0a3366cf8..6297a3ea8f949d 100644 --- a/ppgan/models/generators/generator_styleganv2.py +++ b/ppgan/models/generators/generator_styleganv2.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/rosinality/stylegan2-pytorch +# MIT License +# Copyright (c) 2019 Kim Seonghyeon + import math import random import paddle @@ -147,7 +151,7 @@ def forward(self, image, noise=None): if noise is None: batch, _, height, width = image.shape noise = paddle.randn((batch, 1, height, width)) - if self.is_concat: + if self.is_concat: return paddle.concat([image, self.weight * noise], axis=1) else: return image + self.weight * noise @@ -169,17 +173,15 @@ def forward(self, input): class StyledConv(nn.Layer): - def __init__( - self, - in_channel, - out_channel, - kernel_size, - style_dim, - upsample=False, - blur_kernel=[1, 3, 3, 1], - demodulate=True, - is_concat=False - ): + def __init__(self, + in_channel, + out_channel, + kernel_size, + style_dim, + upsample=False, + blur_kernel=[1, 3, 3, 1], + demodulate=True, + is_concat=False): super().__init__() self.conv = ModulatedConv2D( @@ -193,7 +195,8 @@ def __init__( ) self.noise = NoiseInjection(is_concat=is_concat) - self.activate = FusedLeakyReLU(out_channel*2 if is_concat else out_channel) + self.activate = FusedLeakyReLU(out_channel * + 2 if is_concat else out_channel) def forward(self, input, style, noise=None): out = self.conv(input, style) @@ -236,16 +239,14 @@ def forward(self, input, style, skip=None): @GENERATORS.register() class StyleGANv2Generator(nn.Layer): - def __init__( - self, - size, - style_dim, - n_mlp, - channel_multiplier=2, - blur_kernel=[1, 3, 3, 1], - lr_mlp=0.01, - is_concat=False - ): + def __init__(self, + size, + style_dim, + n_mlp, + channel_multiplier=2, + blur_kernel=[1, 3, 3, 1], + lr_mlp=0.01, + is_concat=False): super().__init__() self.size = size @@ -282,7 +283,10 @@ def __init__( style_dim, blur_kernel=blur_kernel, is_concat=is_concat) - self.to_rgb1 = ToRGB(self.channels[4]*2 if is_concat else self.channels[4], style_dim, upsample=False) + self.to_rgb1 = ToRGB(self.channels[4] * + 2 if is_concat else self.channels[4], + style_dim, + upsample=False) self.log_size = int(math.log(size, 2)) self.num_layers = (self.log_size - 2) * 2 + 1 @@ -305,7 +309,7 @@ def __init__( self.convs.append( StyledConv( - in_channel*2 if is_concat else in_channel, + in_channel * 2 if is_concat else in_channel, out_channel, 3, style_dim, @@ -315,14 +319,15 @@ def __init__( )) self.convs.append( - StyledConv(out_channel*2 if is_concat else out_channel, + StyledConv(out_channel * 2 if is_concat else out_channel, out_channel, 3, style_dim, blur_kernel=blur_kernel, is_concat=is_concat)) - self.to_rgbs.append(ToRGB(out_channel*2 if is_concat else out_channel, style_dim)) + self.to_rgbs.append( + ToRGB(out_channel * 2 if is_concat else out_channel, style_dim)) in_channel = out_channel @@ -408,20 +413,21 @@ def forward( noise_i = 1 outs = [] - for conv1, conv2, to_rgb in zip( - self.convs[::2], self.convs[1::2], self.to_rgbs): - out = conv1(out, latent[:, i], noise=noise[(noise_i + 1)//2]) ### 1 for 2 - out = conv2(out, latent[:, i + 1], noise=noise[(noise_i + 2)//2]) ### 1 for 2 + for conv1, conv2, to_rgb in zip(self.convs[::2], self.convs[1::2], + self.to_rgbs): + out = conv1(out, latent[:, i], + noise=noise[(noise_i + 1) // 2]) ### 1 for 2 + out = conv2(out, + latent[:, i + 1], + noise=noise[(noise_i + 2) // 2]) ### 1 for 2 skip = to_rgb(out, latent[:, i + 2], skip) - + i += 2 noise_i += 2 - else: - for conv1, conv2, noise1, noise2, to_rgb in zip(self.convs[::2], - self.convs[1::2], - noise[1::2], - noise[2::2], - self.to_rgbs): + else: + for conv1, conv2, noise1, noise2, to_rgb in zip( + self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], + self.to_rgbs): out = conv1(out, latent[:, i], noise=noise1) out = conv2(out, latent[:, i + 1], noise=noise2) skip = to_rgb(out, latent[:, i + 2], skip) diff --git a/ppgan/models/generators/resnet_ugatit_p2c.py b/ppgan/models/generators/resnet_ugatit_p2c.py index 865fd9ca5d8ea0..e7874c8bccfb97 100644 --- a/ppgan/models/generators/resnet_ugatit_p2c.py +++ b/ppgan/models/generators/resnet_ugatit_p2c.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/znxlwm/UGATIT-pytorch +# MIT License +# Copyright (c) 2019 Hyeonwoo Kang import paddle import paddle.nn as nn @@ -45,72 +48,82 @@ def __init__(self, nn.ReLU() ] - DownBlock += [ - HourGlass(ngf, ngf), - HourGlass(ngf, ngf) - ] + DownBlock += [HourGlass(ngf, ngf), HourGlass(ngf, ngf)] # Down-Sampling n_downsampling = 2 for i in range(n_downsampling): - mult = 2 ** i + mult = 2**i DownBlock += [ nn.Pad2D([1, 1, 1, 1], 'reflect'), - nn.Conv2D(ngf*mult, ngf*mult*2, kernel_size=3, stride=2, bias_attr=False), - nn.InstanceNorm2D(ngf*mult*2, weight_attr=False, bias_attr=False), + nn.Conv2D(ngf * mult, + ngf * mult * 2, + kernel_size=3, + stride=2, + bias_attr=False), + nn.InstanceNorm2D(ngf * mult * 2, + weight_attr=False, + bias_attr=False), nn.ReLU() ] # Encoder Bottleneck - mult = 2 ** n_downsampling + mult = 2**n_downsampling for i in range(n_blocks): - setattr(self, 'EncodeBlock'+str(i+1), ResnetBlock(ngf*mult)) + setattr(self, 'EncodeBlock' + str(i + 1), ResnetBlock(ngf * mult)) # Class Activation Map - self.gap_fc = nn.Linear(ngf*mult, 1, bias_attr=False) - self.gmp_fc = nn.Linear(ngf*mult, 1, bias_attr=False) - self.conv1x1 = nn.Conv2D(ngf*mult*2, ngf*mult, kernel_size=1, stride=1) + self.gap_fc = nn.Linear(ngf * mult, 1, bias_attr=False) + self.gmp_fc = nn.Linear(ngf * mult, 1, bias_attr=False) + self.conv1x1 = nn.Conv2D(ngf * mult * 2, + ngf * mult, + kernel_size=1, + stride=1) self.relu = nn.ReLU() # Gamma, Beta block FC = [] if self.light: FC += [ - nn.Linear(ngf*mult, ngf*mult, bias_attr=False), + nn.Linear(ngf * mult, ngf * mult, bias_attr=False), nn.ReLU(), - nn.Linear(ngf*mult, ngf*mult, bias_attr=False), + nn.Linear(ngf * mult, ngf * mult, bias_attr=False), nn.ReLU() ] else: FC += [ - nn.Linear(img_size//mult*img_size//mult*ngf*mult, ngf*mult, bias_attr=False), + nn.Linear(img_size // mult * img_size // mult * ngf * mult, + ngf * mult, + bias_attr=False), nn.ReLU(), - nn.Linear(ngf*mult, ngf*mult, bias_attr=False), + nn.Linear(ngf * mult, ngf * mult, bias_attr=False), nn.ReLU() ] # Decoder Bottleneck - mult = 2 ** n_downsampling + mult = 2**n_downsampling for i in range(n_blocks): - setattr(self, 'DecodeBlock'+str(i + 1), ResnetSoftAdaLINBlock(ngf*mult)) + setattr(self, 'DecodeBlock' + str(i + 1), + ResnetSoftAdaLINBlock(ngf * mult)) # Up-Sampling UpBlock = [] for i in range(n_downsampling): - mult = 2 ** (n_downsampling - i) + mult = 2**(n_downsampling - i) UpBlock += [ nn.Upsample(scale_factor=2), nn.Pad2D([1, 1, 1, 1], 'reflect'), - nn.Conv2D(ngf*mult, ngf*mult//2, kernel_size=3, stride=1, bias_attr=False), - LIN(ngf*mult//2), + nn.Conv2D(ngf * mult, + ngf * mult // 2, + kernel_size=3, + stride=1, + bias_attr=False), + LIN(ngf * mult // 2), nn.ReLU() ] - UpBlock += [ - HourGlass(ngf, ngf), - HourGlass(ngf, ngf, False) - ] + UpBlock += [HourGlass(ngf, ngf), HourGlass(ngf, ngf, False)] UpBlock += [ nn.Pad2D([3, 3, 3, 3], 'reflect'), @@ -129,8 +142,9 @@ def forward(self, x): content_features = [] for i in range(self.n_blocks): - x = getattr(self, 'EncodeBlock'+str(i+1))(x) - content_features.append(F.adaptive_avg_pool2d(x, 1).reshape([bs, -1])) + x = getattr(self, 'EncodeBlock' + str(i + 1))(x) + content_features.append( + F.adaptive_avg_pool2d(x, 1).reshape([bs, -1])) gap = F.adaptive_avg_pool2d(x, 1) gap_logit = self.gap_fc(gap.reshape([bs, -1])) @@ -155,7 +169,10 @@ def forward(self, x): style_features = self.FC(x.reshape([bs, -1])) for i in range(self.n_blocks): - x = getattr(self, 'DecodeBlock'+str(i+1))(x, content_features[4-i-1], style_features) + x = getattr(self, + 'DecodeBlock' + str(i + 1))(x, + content_features[4 - i - 1], + style_features) out = self.UpBlock(x) @@ -168,25 +185,27 @@ def __init__(self, dim_in, dim_out): self.dim_in = dim_in self.dim_out = dim_out - self.conv_block1 = self.__convblock(dim_in, dim_out//2) - self.conv_block2 = self.__convblock(dim_out//2, dim_out//4) - self.conv_block3 = self.__convblock(dim_out//4, dim_out//4) + self.conv_block1 = self.__convblock(dim_in, dim_out // 2) + self.conv_block2 = self.__convblock(dim_out // 2, dim_out // 4) + self.conv_block3 = self.__convblock(dim_out // 4, dim_out // 4) if self.dim_in != self.dim_out: self.conv_skip = nn.Sequential( nn.InstanceNorm2D(dim_in, weight_attr=False, bias_attr=False), nn.ReLU(), - nn.Conv2D(dim_in, dim_out, kernel_size=1, stride=1, bias_attr=False) - ) + nn.Conv2D(dim_in, + dim_out, + kernel_size=1, + stride=1, + bias_attr=False)) @staticmethod def __convblock(dim_in, dim_out): return nn.Sequential( nn.InstanceNorm2D(dim_in, weight_attr=False, bias_attr=False), - nn.ReLU(), - nn.Pad2D([1, 1, 1, 1], 'reflect'), - nn.Conv2D(dim_in, dim_out, kernel_size=3, stride=1, bias_attr=False) - ) + nn.ReLU(), nn.Pad2D([1, 1, 1, 1], 'reflect'), + nn.Conv2D(dim_in, dim_out, kernel_size=3, stride=1, + bias_attr=False)) def forward(self, x): residual = x @@ -210,24 +229,25 @@ def __init__(self, dim_in): self.n_block = 9 for i in range(self.n_skip): - setattr(self, 'ConvBlockskip'+str(i+1), ConvBlock(dim_in, dim_in)) + setattr(self, 'ConvBlockskip' + str(i + 1), + ConvBlock(dim_in, dim_in)) for i in range(self.n_block): - setattr(self, 'ConvBlock'+str(i+1), ConvBlock(dim_in, dim_in)) + setattr(self, 'ConvBlock' + str(i + 1), ConvBlock(dim_in, dim_in)) def forward(self, x): skips = [] for i in range(self.n_skip): - skips.append(getattr(self, 'ConvBlockskip'+str(i+1))(x)) + skips.append(getattr(self, 'ConvBlockskip' + str(i + 1))(x)) x = F.avg_pool2d(x, 2) - x = getattr(self, 'ConvBlock'+str(i+1))(x) + x = getattr(self, 'ConvBlock' + str(i + 1))(x) x = self.ConvBlock5(x) for i in range(self.n_skip): - x = getattr(self, 'ConvBlock'+str(i+6))(x) + x = getattr(self, 'ConvBlock' + str(i + 6))(x) x = F.upsample(x, scale_factor=2) - x = skips[self.n_skip-i-1] + x + x = skips[self.n_skip - i - 1] + x return x @@ -238,12 +258,14 @@ def __init__(self, dim_in, dim_out, use_res=True): self.use_res = use_res self.HG = nn.Sequential( - HourGlassBlock(dim_in), - ConvBlock(dim_out, dim_out), - nn.Conv2D(dim_out, dim_out, kernel_size=1, stride=1, bias_attr=False), + HourGlassBlock(dim_in), ConvBlock(dim_out, dim_out), + nn.Conv2D(dim_out, + dim_out, + kernel_size=1, + stride=1, + bias_attr=False), nn.InstanceNorm2D(dim_out, weight_attr=False, bias_attr=False), - nn.ReLU() - ) + nn.ReLU()) self.Conv1 = nn.Conv2D(dim_out, 3, kernel_size=1, stride=1) @@ -292,12 +314,20 @@ class ResnetSoftAdaLINBlock(nn.Layer): def __init__(self, dim, use_bias=False): super(ResnetSoftAdaLINBlock, self).__init__() self.pad1 = nn.Pad2D([1, 1, 1, 1], 'reflect') - self.conv1 = nn.Conv2D(dim, dim, kernel_size=3, stride=1, bias_attr=use_bias) + self.conv1 = nn.Conv2D(dim, + dim, + kernel_size=3, + stride=1, + bias_attr=use_bias) self.norm1 = SoftAdaLIN(dim) self.relu1 = nn.ReLU() self.pad2 = nn.Pad2D([1, 1, 1, 1], 'reflect') - self.conv2 = nn.Conv2D(dim, dim, kernel_size=3, stride=1, bias_attr=use_bias) + self.conv2 = nn.Conv2D(dim, + dim, + kernel_size=3, + stride=1, + bias_attr=use_bias) self.norm2 = SoftAdaLIN(dim) def forward(self, x, content_features, style_features): @@ -317,23 +347,28 @@ def __init__(self, num_features, eps=1e-5): super(SoftAdaLIN, self).__init__() self.norm = AdaLIN(num_features, eps) - self.w_gamma = self.create_parameter([1, num_features], default_initializer=nn.initializer.Constant(0.)) - self.w_beta = self.create_parameter([1, num_features], default_initializer=nn.initializer.Constant(0.)) - - self.c_gamma = nn.Sequential(nn.Linear(num_features, num_features, bias_attr=False), - nn.ReLU(), - nn.Linear(num_features, num_features, bias_attr=False)) - self.c_beta = nn.Sequential(nn.Linear(num_features, num_features, bias_attr=False), - nn.ReLU(), - nn.Linear(num_features, num_features, bias_attr=False)) + self.w_gamma = self.create_parameter( + [1, num_features], default_initializer=nn.initializer.Constant(0.)) + self.w_beta = self.create_parameter( + [1, num_features], default_initializer=nn.initializer.Constant(0.)) + + self.c_gamma = nn.Sequential( + nn.Linear(num_features, num_features, bias_attr=False), nn.ReLU(), + nn.Linear(num_features, num_features, bias_attr=False)) + self.c_beta = nn.Sequential( + nn.Linear(num_features, num_features, bias_attr=False), nn.ReLU(), + nn.Linear(num_features, num_features, bias_attr=False)) self.s_gamma = nn.Linear(num_features, num_features, bias_attr=False) self.s_beta = nn.Linear(num_features, num_features, bias_attr=False) def forward(self, x, content_features, style_features): - content_gamma, content_beta = self.c_gamma(content_features), self.c_beta(content_features) - style_gamma, style_beta = self.s_gamma(style_features), self.s_beta(style_features) + content_gamma, content_beta = self.c_gamma( + content_features), self.c_beta(content_features) + style_gamma, style_beta = self.s_gamma(style_features), self.s_beta( + style_features) - w_gamma_, w_beta_ = self.w_gamma.expand([x.shape[0], -1]), self.w_beta.expand([x.shape[0], -1]) + w_gamma_, w_beta_ = self.w_gamma.expand( + [x.shape[0], -1]), self.w_beta.expand([x.shape[0], -1]) soft_gamma = (1. - w_gamma_) * style_gamma + w_gamma_ * content_gamma soft_beta = (1. - w_beta_) * style_beta + w_beta_ * content_beta @@ -345,16 +380,25 @@ class AdaLIN(nn.Layer): def __init__(self, num_features, eps=1e-5): super(AdaLIN, self).__init__() self.eps = eps - self.rho = self.create_parameter([1, num_features, 1, 1], default_initializer=nn.initializer.Constant(0.9)) + self.rho = self.create_parameter( + [1, num_features, 1, 1], + default_initializer=nn.initializer.Constant(0.9)) def forward(self, x, gamma, beta): - in_mean, in_var = paddle.mean(x, axis=[2, 3], keepdim=True), paddle.var(x, axis=[2, 3], keepdim=True) + in_mean, in_var = paddle.mean(x, axis=[2, 3], + keepdim=True), paddle.var(x, + axis=[2, 3], + keepdim=True) out_in = (x - in_mean) / paddle.sqrt(in_var + self.eps) - ln_mean, ln_var = paddle.mean(x, axis=[1, 2, 3], keepdim=True), paddle.var(x, axis=[1, 2, 3], keepdim=True) + ln_mean, ln_var = paddle.mean(x, axis=[1, 2, 3], + keepdim=True), paddle.var(x, + axis=[1, 2, 3], + keepdim=True) out_ln = (x - ln_mean) / paddle.sqrt(ln_var + self.eps) out = self.rho.expand([x.shape[0], -1, -1, -1]) * out_in + \ (1-self.rho.expand([x.shape[0], -1, -1, -1])) * out_ln - out = out * gamma.unsqueeze(2).unsqueeze(3) + beta.unsqueeze(2).unsqueeze(3) + out = out * gamma.unsqueeze(2).unsqueeze(3) + beta.unsqueeze( + 2).unsqueeze(3) return out @@ -363,17 +407,31 @@ class LIN(nn.Layer): def __init__(self, num_features, eps=1e-5): super(LIN, self).__init__() self.eps = eps - self.rho = self.create_parameter([1, num_features, 1, 1], default_initializer=nn.initializer.Constant(0.)) - self.gamma = self.create_parameter([1, num_features, 1, 1], default_initializer=nn.initializer.Constant(1.)) - self.beta = self.create_parameter([1, num_features, 1, 1], default_initializer=nn.initializer.Constant(0.)) + self.rho = self.create_parameter( + [1, num_features, 1, 1], + default_initializer=nn.initializer.Constant(0.)) + self.gamma = self.create_parameter( + [1, num_features, 1, 1], + default_initializer=nn.initializer.Constant(1.)) + self.beta = self.create_parameter( + [1, num_features, 1, 1], + default_initializer=nn.initializer.Constant(0.)) def forward(self, x): - in_mean, in_var = paddle.mean(x, axis=[2, 3], keepdim=True), paddle.var(x, axis=[2, 3], keepdim=True) + in_mean, in_var = paddle.mean(x, axis=[2, 3], + keepdim=True), paddle.var(x, + axis=[2, 3], + keepdim=True) out_in = (x - in_mean) / paddle.sqrt(in_var + self.eps) - ln_mean, ln_var = paddle.mean(x, axis=[1, 2, 3], keepdim=True), paddle.var(x, axis=[1, 2, 3], keepdim=True) + ln_mean, ln_var = paddle.mean(x, axis=[1, 2, 3], + keepdim=True), paddle.var(x, + axis=[1, 2, 3], + keepdim=True) out_ln = (x - ln_mean) / paddle.sqrt(ln_var + self.eps) out = self.rho.expand([x.shape[0], -1, -1, -1]) * out_in + \ (1-self.rho.expand([x.shape[0], -1, -1, -1])) * out_ln - out = out * self.gamma.expand([x.shape[0], -1, -1, -1]) + self.beta.expand([x.shape[0], -1, -1, -1]) + out = out * self.gamma.expand([x.shape[0], -1, -1, -1 + ]) + self.beta.expand( + [x.shape[0], -1, -1, -1]) return out diff --git a/ppgan/models/generators/wav2lip.py b/ppgan/models/generators/wav2lip.py index 7c62f783462f82..5c8b0c9438a081 100644 --- a/ppgan/models/generators/wav2lip.py +++ b/ppgan/models/generators/wav2lip.py @@ -18,9 +18,9 @@ def __init__(self): self.face_encoder_blocks = nn.LayerList([ nn.Sequential(ConvBNRelu(6, 16, kernel_size=7, stride=1, - padding=3)), # 96,96 + padding=3)), nn.Sequential( - ConvBNRelu(16, 32, kernel_size=3, stride=2, padding=1), # 48,48 + ConvBNRelu(16, 32, kernel_size=3, stride=2, padding=1), ConvBNRelu(32, 32, kernel_size=3, @@ -34,7 +34,7 @@ def __init__(self): padding=1, residual=True)), nn.Sequential( - ConvBNRelu(32, 64, kernel_size=3, stride=2, padding=1), # 24,24 + ConvBNRelu(32, 64, kernel_size=3, stride=2, padding=1), ConvBNRelu(64, 64, kernel_size=3, @@ -54,8 +54,7 @@ def __init__(self): padding=1, residual=True)), nn.Sequential( - ConvBNRelu(64, 128, kernel_size=3, stride=2, - padding=1), # 12,12 + ConvBNRelu(64, 128, kernel_size=3, stride=2, padding=1), ConvBNRelu(128, 128, kernel_size=3, @@ -69,7 +68,7 @@ def __init__(self): padding=1, residual=True)), nn.Sequential( - ConvBNRelu(128, 256, kernel_size=3, stride=2, padding=1), # 6,6 + ConvBNRelu(128, 256, kernel_size=3, stride=2, padding=1), ConvBNRelu(256, 256, kernel_size=3, @@ -83,7 +82,7 @@ def __init__(self): padding=1, residual=True)), nn.Sequential( - ConvBNRelu(256, 512, kernel_size=3, stride=2, padding=1), # 3,3 + ConvBNRelu(256, 512, kernel_size=3, stride=2, padding=1), ConvBNRelu(512, 512, kernel_size=3, @@ -92,8 +91,7 @@ def __init__(self): residual=True), ), nn.Sequential( - ConvBNRelu(512, 512, kernel_size=3, stride=1, - padding=0), # 1, 1 + ConvBNRelu(512, 512, kernel_size=3, stride=1, padding=0), ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0)), ]) @@ -156,7 +154,7 @@ def __init__(self): 512, kernel_size=3, stride=1, - padding=0), # 3,3 + padding=0), ConvBNRelu(512, 512, kernel_size=3, @@ -183,7 +181,7 @@ def __init__(self): stride=1, padding=1, residual=True), - ), # 6, 6 + ), nn.Sequential( Conv2dTransposeRelu(768, 384, @@ -203,7 +201,7 @@ def __init__(self): stride=1, padding=1, residual=True), - ), # 12, 12 + ), nn.Sequential( Conv2dTransposeRelu(512, 256, @@ -223,7 +221,7 @@ def __init__(self): stride=1, padding=1, residual=True), - ), # 24, 24 + ), nn.Sequential( Conv2dTransposeRelu(320, 128, @@ -243,7 +241,7 @@ def __init__(self): stride=1, padding=1, residual=True), - ), # 48, 48 + ), nn.Sequential( Conv2dTransposeRelu(160, 64, @@ -264,14 +262,13 @@ def __init__(self): padding=1, residual=True), ), - ]) # 96,96 + ]) self.output_block = nn.Sequential( ConvBNRelu(80, 32, kernel_size=3, stride=1, padding=1), nn.Conv2D(32, 3, kernel_size=1, stride=1, padding=0), nn.Sigmoid()) def forward(self, audio_sequences, face_sequences): - # audio_sequences = (B, T, 1, 80, 16) B = audio_sequences.shape[0] input_dim_size = len(face_sequences.shape) @@ -285,7 +282,7 @@ def forward(self, audio_sequences, face_sequences): ], axis=0) - audio_embedding = self.audio_encoder(audio_sequences) # B, 512, 1, 1 + audio_embedding = self.audio_encoder(audio_sequences) feats = [] x = face_sequences @@ -308,8 +305,8 @@ def forward(self, audio_sequences, face_sequences): x = self.output_block(x) if input_dim_size > 4: - x = paddle.split(x, int(x.shape[0] / B), axis=0) # [(B, C, H, W)] - outputs = paddle.stack(x, axis=2) # (B, C, T, H, W) + x = paddle.split(x, int(x.shape[0] / B), axis=0) + outputs = paddle.stack(x, axis=2) else: outputs = x diff --git a/ppgan/models/mpr_model.py b/ppgan/models/mpr_model.py index 1c2c7cb28a9c66..d88e8f11f441c9 100644 --- a/ppgan/models/mpr_model.py +++ b/ppgan/models/mpr_model.py @@ -77,12 +77,3 @@ def train_iter(self, optims=None): def forward(self): """Run forward pass; called by both functions and .""" pass - - -def init_edvr_weight(net): - def reset_func(m): - if hasattr(m, 'weight') and (not isinstance( - m, (nn.BatchNorm, nn.BatchNorm2D))): - reset_parameters(m) - - net.apply(reset_func) diff --git a/ppgan/models/starganv2_model.py b/ppgan/models/starganv2_model.py index 0598579694e369..f7d5e5e6aed068 100755 --- a/ppgan/models/starganv2_model.py +++ b/ppgan/models/starganv2_model.py @@ -1,3 +1,7 @@ +# code was heavily based on https://github.com/clovaai/stargan-v2 +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/clovaai/stargan-v2#license + from paddle.fluid.layers.nn import soft_relu from .base_model import BaseModel diff --git a/ppgan/modules/caffevgg.py b/ppgan/modules/caffevgg.py index bf40f5bb30208e..b0780899ecceb8 100644 --- a/ppgan/modules/caffevgg.py +++ b/ppgan/modules/caffevgg.py @@ -2,6 +2,7 @@ import paddle.nn as nn import numpy as np from ppgan.utils.download import get_path_from_url + model_urls = { 'caffevgg19': ('https://paddlegan.bj.bcebos.com/models/vgg19_no_fc.npy', '8ea1ef2374f8684b6cea9f300849be81') @@ -29,10 +30,13 @@ def __init__(self, output_index: int = 26) -> None: self.mean = mean.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) def _process(self, x): - rgb = (x * 0.5 + 0.5) * 255 # value to 255 + # value to 255 + rgb = (x * 0.5 + 0.5) * 255 + # rgb to bgr bgr = paddle.stack((rgb[:, 2, :, :], rgb[:, 1, :, :], rgb[:, 0, :, :]), - 1) # rgb to bgr - return bgr - self.mean # vgg norm + 1) + # vgg norm + return bgr - self.mean def _forward_impl(self, x): x = self._process(x) diff --git a/ppgan/modules/equalized.py b/ppgan/modules/equalized.py index 7d2eef17ba6feb..2ef60e66846d65 100644 --- a/ppgan/modules/equalized.py +++ b/ppgan/modules/equalized.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/rosinality/stylegan2-pytorch +# MIT License +# Copyright (c) 2019 Kim Seonghyeon + import math import paddle import paddle.nn as nn diff --git a/ppgan/modules/fused_act.py b/ppgan/modules/fused_act.py index d1bc584fc5fa0b..0bf89f00d0aac7 100644 --- a/ppgan/modules/fused_act.py +++ b/ppgan/modules/fused_act.py @@ -12,37 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/rosinality/stylegan2-pytorch +# MIT License +# Copyright (c) 2019 Kim Seonghyeon + import paddle import paddle.nn as nn import paddle.nn.functional as F - - + + class FusedLeakyReLU(nn.Layer): - def __init__(self, channel, bias=True, negative_slope=0.2, scale=2 ** 0.5): + def __init__(self, channel, bias=True, negative_slope=0.2, scale=2**0.5): super().__init__() - + if bias: - self.bias = self.create_parameter((channel,), default_initializer=nn.initializer.Constant(0.0)) - + self.bias = self.create_parameter( + (channel, ), default_initializer=nn.initializer.Constant(0.0)) + else: self.bias = None - + self.negative_slope = negative_slope self.scale = scale - + def forward(self, input): - return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale) - - -def fused_leaky_relu(input, bias=None, negative_slope=0.2, scale=2 ** 0.5): + return fused_leaky_relu(input, self.bias, self.negative_slope, + self.scale) + + +def fused_leaky_relu(input, bias=None, negative_slope=0.2, scale=2**0.5): if bias is not None: rest_dim = [1] * (len(input.shape) - len(bias.shape) - 1) - return ( - F.leaky_relu( - input + bias.reshape((1, bias.shape[0], *rest_dim)), negative_slope=0.2 - ) - * scale - ) - + return (F.leaky_relu(input + bias.reshape( + (1, bias.shape[0], *rest_dim)), + negative_slope=0.2) * scale) + else: return F.leaky_relu(input, negative_slope=0.2) * scale diff --git a/ppgan/modules/upfirdn2d.py b/ppgan/modules/upfirdn2d.py index ac34a889b279a1..ca5972d93cddcf 100644 --- a/ppgan/modules/upfirdn2d.py +++ b/ppgan/modules/upfirdn2d.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# code was heavily based on https://github.com/rosinality/stylegan2-pytorch +# MIT License +# Copyright (c) 2019 Kim Seonghyeon + import paddle import paddle.nn as nn import paddle.nn.functional as F diff --git a/ppgan/modules/wing.py b/ppgan/modules/wing.py index 6b583db4c7d58f..4cdc1826aef4b1 100755 --- a/ppgan/modules/wing.py +++ b/ppgan/modules/wing.py @@ -1,8 +1,6 @@ -""" -StarGAN v2 -Copyright (c) 2020-present NAVER Corp. - -""" +# code was heavily based on https://github.com/clovaai/stargan-v2 +# Users should be careful about adopting these functions in any commercial matters. +# https://github.com/clovaai/stargan-v2#license from collections import namedtuple from copy import deepcopy @@ -25,9 +23,16 @@ def __init__(self, num_modules, depth, num_features, first_one=False): self.num_modules = num_modules self.depth = depth self.features = num_features - self.coordconv = CoordConvTh(64, 64, True, True, 256, first_one, + self.coordconv = CoordConvTh(64, + 64, + True, + True, + 256, + first_one, out_channels=256, - kernel_size=1, stride=1, padding=0) + kernel_size=1, + stride=1, + padding=0) self._generate_network(self.depth) def _generate_network(self, level): @@ -68,14 +73,19 @@ def __init__(self, height=64, width=64, with_r=False, with_boundary=False): self.with_boundary = with_boundary with paddle.no_grad(): - x_coords = paddle.arange(height).unsqueeze(1).expand((height, width)).astype('float32') - y_coords = paddle.arange(width).unsqueeze(0).expand((height, width)).astype('float32') + x_coords = paddle.arange(height).unsqueeze(1).expand( + (height, width)).astype('float32') + y_coords = paddle.arange(width).unsqueeze(0).expand( + (height, width)).astype('float32') x_coords = (x_coords / (height - 1)) * 2 - 1 y_coords = (y_coords / (width - 1)) * 2 - 1 - coords = paddle.stack([x_coords, y_coords], axis=0) # (2, height, width) + coords = paddle.stack([x_coords, y_coords], + axis=0) # (2, height, width) if self.with_r: - rr = paddle.sqrt(paddle.pow(x_coords, 2) + paddle.pow(y_coords, 2)) # (height, width) + rr = paddle.sqrt( + paddle.pow(x_coords, 2) + + paddle.pow(y_coords, 2)) # (height, width) rr = (rr / paddle.max(rr)).unsqueeze(0) coords = paddle.concat([coords, rr], axis=0) @@ -92,9 +102,12 @@ def forward(self, x, heatmap=None): if self.with_boundary and heatmap is not None: boundary_channel = paddle.clip(heatmap[:, -1:, :, :], 0.0, 1.0) zero_tensor = paddle.zeros_like(self.x_coords) - xx_boundary_channel = paddle.where(boundary_channel > 0.05, self.x_coords, zero_tensor) - yy_boundary_channel = paddle.where(boundary_channel > 0.05, self.y_coords, zero_tensor) - coords = paddle.concat([coords, xx_boundary_channel, yy_boundary_channel], axis=1) + xx_boundary_channel = paddle.where(boundary_channel > 0.05, + self.x_coords, zero_tensor) + yy_boundary_channel = paddle.where(boundary_channel > 0.05, + self.y_coords, zero_tensor) + coords = paddle.concat( + [coords, xx_boundary_channel, yy_boundary_channel], axis=1) x_and_coords = paddle.concat([x, coords], axis=1) return x_and_coords @@ -102,8 +115,15 @@ def forward(self, x, heatmap=None): class CoordConvTh(nn.Layer): """CoordConv layer as in the paper.""" - def __init__(self, height, width, with_r, with_boundary, - in_channels, first_one=False, *args, **kwargs): + def __init__(self, + height, + width, + with_r, + with_boundary, + in_channels, + first_one=False, + *args, + **kwargs): super(CoordConvTh, self).__init__() self.addcoords = AddCoordsTh(height, width, with_r, with_boundary) in_channels += 2 @@ -124,7 +144,12 @@ class ConvBlock(nn.Layer): def __init__(self, in_planes, out_planes): super(ConvBlock, self).__init__() self.bn1 = nn.BatchNorm2D(in_planes) - conv3x3 = partial(nn.Conv2D, kernel_size=3, stride=1, padding=1, bias_attr=False, dilation=1) + conv3x3 = partial(nn.Conv2D, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + dilation=1) self.conv1 = conv3x3(in_planes, int(out_planes / 2)) self.bn2 = nn.BatchNorm2D(int(out_planes / 2)) self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4)) @@ -133,9 +158,9 @@ def __init__(self, in_planes, out_planes): self.downsample = None if in_planes != out_planes: - self.downsample = nn.Sequential(nn.BatchNorm2D(in_planes), - nn.ReLU(True), - nn.Conv2D(in_planes, out_planes, 1, 1, bias_attr=False)) + self.downsample = nn.Sequential( + nn.BatchNorm2D(in_planes), nn.ReLU(True), + nn.Conv2D(in_planes, out_planes, 1, 1, bias_attr=False)) def forward(self, x): residual = x @@ -168,7 +193,7 @@ def normalize(x, eps=1e-6): """Apply min-max normalization.""" # x = x.contiguous() N, C, H, W = x.shape - x_ = paddle.reshape(x, (N*C, -1)) + x_ = paddle.reshape(x, (N * C, -1)) max_val = paddle.max(x_, axis=1, keepdim=True)[0] min_val = paddle.min(x_, axis=1, keepdim=True)[0] x_ = (x_ - min_val) / (max_val - min_val + eps) @@ -193,14 +218,14 @@ def shift(x, N): N = abs(N) _, _, H, W = x.shape head = np.arange(N) - tail = np.arange(H-N) + tail = np.arange(H - N) if up: - head = np.arange(H-N)+N + head = np.arange(H - N) + N tail = np.arange(N) else: - head = np.arange(N) + (H-N) - tail = np.arange(H-N) + head = np.arange(N) + (H - N) + tail = np.arange(H - N) # permutation indices perm = np.concatenate([head, tail]) @@ -231,29 +256,31 @@ def preprocess(x): sw = H // 256 operations = Munch(chin=OPPAIR(0, 3), - eyebrows=OPPAIR(-7*sw, 2), - nostrils=OPPAIR(8*sw, 4), - lipupper=OPPAIR(-8*sw, 4), - liplower=OPPAIR(8*sw, 4), - lipinner=OPPAIR(-2*sw, 3)) + eyebrows=OPPAIR(-7 * sw, 2), + nostrils=OPPAIR(8 * sw, 4), + lipupper=OPPAIR(-8 * sw, 4), + liplower=OPPAIR(8 * sw, 4), + lipinner=OPPAIR(-2 * sw, 3)) for part, ops in operations.items(): start, end = index_map[part] x[:, start:end] = resize(shift(x[:, start:end], ops.shift), ops.resize) - zero_out = paddle.concat([paddle.arange(0, index_map.chin.start), - paddle.arange(index_map.chin.end, 33), - paddle.to_tensor([index_map.eyebrowsedges.start, - index_map.eyebrowsedges.end, - index_map.lipedges.start, - index_map.lipedges.end])]) + zero_out = paddle.concat([ + paddle.arange(0, index_map.chin.start), + paddle.arange(index_map.chin.end, 33), + paddle.to_tensor([ + index_map.eyebrowsedges.start, index_map.eyebrowsedges.end, + index_map.lipedges.start, index_map.lipedges.end + ]) + ]) x = x.numpy() zero_out = zero_out.numpy() x[:, zero_out] = 0 x = paddle.to_tensor(x) start, end = index_map.nose - x[:, start+1:end] = shift(x[:, start+1:end], 4*sw) + x[:, start + 1:end] = shift(x[:, start + 1:end], 4 * sw) x[:, start:end] = resize(x[:, start:end], 1) start, end = index_map.eyes @@ -264,8 +291,10 @@ def preprocess(x): # Second-level mask x2 = deepcopy(x) x2[:, index_map.chin.start:index_map.chin.end] = 0 # start:end was 0:33 - x2[:, index_map.lipedges.start:index_map.lipinner.end] = 0 # start:end was 76:96 - x2[:, index_map.eyebrows.start:index_map.eyebrows.end] = 0 # start:end was 33:51 + x2[:, index_map.lipedges.start:index_map.lipinner. + end] = 0 # start:end was 76:96 + x2[:, index_map.eyebrows.start:index_map.eyebrows. + end] = 0 # start:end was 33:51 x = paddle.sum(x, axis=1, keepdim=True) # (N, 1, H, W) x2 = paddle.sum(x2, axis=1, keepdim=True) # mask without faceline and mouth diff --git a/ppgan/utils/visual.py b/ppgan/utils/visual.py index dccb7a9d62f8fc..6982634661b896 100644 --- a/ppgan/utils/visual.py +++ b/ppgan/utils/visual.py @@ -45,18 +45,23 @@ def make_grid(tensor, nrow=8, normalize=False, range=None, scale_each=False): if isinstance(tensor, list): tensor = paddle.stack(tensor, 0) - if tensor.dim() == 2: # single image H x W + # single image H x W + if tensor.dim() == 2: tensor = tensor.unsqueeze(0) - if tensor.dim() == 3: # single image - if tensor.shape[0] == 1: # if single-channel, convert to 3-channel + # single image + if tensor.dim() == 3: + # if single-channel, convert to 3-channel + if tensor.shape[0] == 1: tensor = paddle.concat([tensor, tensor, tensor], 0) tensor = tensor.unsqueeze(0) - if tensor.dim() == 4 and tensor.shape[1] == 1: # single-channel images + # single-channel images + if tensor.dim() == 4 and tensor.shape[1] == 1: tensor = paddle.concat([tensor, tensor, tensor], 1) if normalize is True: - tensor = tensor.astype(tensor.dtype) # avoid modifying tensor in-place + # avoid modifying tensor in-place + tensor = tensor.astype(tensor.dtype) if range is not None: assert isinstance(range, tuple), \ "range has to be a tuple (min, max) if specified. min and max are numbers" @@ -72,7 +77,8 @@ def norm_range(t, range): norm_ip(t, float(t.min()), float(t.max())) if scale_each is True: - for t in tensor: # loop over mini-batch dimension + # loop over mini-batch dimension + for t in tensor: norm_range(t, range) else: norm_range(tensor, range) @@ -103,27 +109,31 @@ def tensor2img(input_image, min_max=(-1., 1.), image_num=1, imtype=np.uint8): """"Converts a Tensor array into a numpy image array. Parameters: - input_image (tensor) -- the input image tensor array - image_num (int) -- the convert iamge numbers - imtype (type) -- the desired type of the converted numpy array + input_image (tensor): the input image tensor array + image_num (int): the convert iamge numbers + imtype (type): the desired type of the converted numpy array """ def processing(img, transpose=True): """"processing one numpy image. Parameters: - im (tensor) -- the input image numpy array + im (tensor): the input image numpy array """ - if img.shape[0] == 1: # grayscale to RGB + # grayscale to RGB + if img.shape[0] == 1: img = np.tile(img, (3, 1, 1)) img = img.clip(min_max[0], min_max[1]) img = (img - min_max[0]) / (min_max[1] - min_max[0]) if imtype == np.uint8: - img = img * 255.0 # scaling - img = np.transpose(img, (1, 2, 0)) if transpose else img # tranpose + # scaling + img = img * 255.0 + # tranpose + img = np.transpose(img, (1, 2, 0)) if transpose else img return img if not isinstance(input_image, np.ndarray): - image_numpy = input_image.numpy() # convert it into a numpy array + # convert it into a numpy array + image_numpy = input_image.numpy() ndim = image_numpy.ndim if ndim == 4: image_numpy = image_numpy[0:image_num] @@ -144,7 +154,8 @@ def processing(img, transpose=True): image_numpy = np.stack( [processing(im, transpose=False) for im in image_numpy]) - else: # if it is a numpy array, do nothing + else: + # if it is a numpy array, do nothing image_numpy = input_image image_numpy = image_numpy.round() return image_numpy.astype(imtype)