diff --git a/ppgan/apps/styleganv2fitting_predictor.py b/ppgan/apps/styleganv2fitting_predictor.py
index 5c680d0bf40360..930a94a610e072 100644
--- a/ppgan/apps/styleganv2fitting_predictor.py
+++ b/ppgan/apps/styleganv2fitting_predictor.py
@@ -38,14 +38,12 @@ def make_image(tensor):
 
 
 class StyleGANv2FittingPredictor(StyleGANv2Predictor):
-    def run(
-            self,
+    def run(self,
             image,
             need_align=False,
             start_lr=0.1,
             final_lr=0.025,
-            latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
-                          11],  # for ffhq (0~17)
+            latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
             step=100,
             mse_weight=1,
             pre_latent=None):
diff --git a/ppgan/datasets/animeganv2_dataset.py b/ppgan/datasets/animeganv2_dataset.py
index 66a58ff7778c5a..7f70ef8bfdd18c 100644
--- a/ppgan/datasets/animeganv2_dataset.py
+++ b/ppgan/datasets/animeganv2_dataset.py
@@ -36,9 +36,9 @@ def __init__(self,
         """Initialize this dataset class.
 
         Args:
-            cfg (dict) -- stores all the experiment flags
+            dataroot (dict): Directory of dataset.
+
         """
-        # self.cfg = cfg
         self.root = dataroot
         self.style = style
 
diff --git a/ppgan/datasets/firstorder_dataset.py b/ppgan/datasets/firstorder_dataset.py
index f41733a04d9df8..5097273ce607fc 100755
--- a/ppgan/datasets/firstorder_dataset.py
+++ b/ppgan/datasets/firstorder_dataset.py
@@ -239,8 +239,7 @@ def __getitem__(self, idx):
                 out['driving'] = out['source']
                 out['source'] = buf
         else:
-            video = np.stack(video_array, axis=0).astype(
-                    np.float32) / 255.0
+            video = np.stack(video_array, axis=0).astype(np.float32) / 255.0
             out['video'] = video.transpose(3, 0, 1, 2)
         out['name'] = video_name
         return out
diff --git a/ppgan/datasets/mpr_dataset.py b/ppgan/datasets/mpr_dataset.py
index 8c243cbd4d4a2a..fad866dabeeecd 100644
--- a/ppgan/datasets/mpr_dataset.py
+++ b/ppgan/datasets/mpr_dataset.py
@@ -1,16 +1,6 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# code was heavily based on https://github.com/swz30/MPRNet
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/swz30/MPRNet/blob/main/LICENSE.md
 
 import os
 import random
diff --git a/ppgan/datasets/starganv2_dataset.py b/ppgan/datasets/starganv2_dataset.py
index cd1621e07cef52..0985b13c62362e 100755
--- a/ppgan/datasets/starganv2_dataset.py
+++ b/ppgan/datasets/starganv2_dataset.py
@@ -1,3 +1,6 @@
+# code was heavily based on https://github.com/clovaai/stargan-v2
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/clovaai/stargan-v2#license
 
 import paddle
 from .base_dataset import BaseDataset
@@ -14,8 +17,11 @@
 
 
 def listdir(dname):
-    fnames = list(chain(*[list(Path(dname).rglob('*.' + ext))
-                          for ext in ['png', 'jpg', 'jpeg', 'JPG']]))
+    fnames = list(
+        chain(*[
+            list(Path(dname).rglob('*.' + ext))
+            for ext in ['png', 'jpg', 'jpeg', 'JPG']
+        ]))
     return fnames
 
 
@@ -97,7 +103,6 @@ def __len__(self):
         return len(self.targets)
 
 
-
 @DATASETS.register()
 class StarGANv2Dataset(BaseDataset):
     """
@@ -120,15 +125,16 @@ def __init__(self, dataroot, is_train, preprocess, test_count=0):
         else:
             files = os.listdir(self.dataroot)
             if 'src' in files and 'ref' in files:
-                self.src_loader = ImageFolder(os.path.join(self.dataroot, 'src'))
-                self.ref_loader = ImageFolder(os.path.join(self.dataroot, 'ref'))
+                self.src_loader = ImageFolder(os.path.join(
+                    self.dataroot, 'src'))
+                self.ref_loader = ImageFolder(os.path.join(
+                    self.dataroot, 'ref'))
             else:
                 self.src_loader = ImageFolder(self.dataroot)
                 self.ref_loader = ImageFolder(self.dataroot)
             self.counts = min(test_count, len(self.src_loader))
             self.counts = min(self.counts, len(self.ref_loader))
 
-        
     def _fetch_inputs(self):
         try:
             x, y = next(self.iter_src)
@@ -136,7 +142,7 @@ def _fetch_inputs(self):
             self.iter_src = iter(self.src_loader)
             x, y = next(self.iter_src)
         return x, y
-    
+
     def _fetch_refs(self):
         try:
             x, x2, y = next(self.iter_ref)
@@ -165,7 +171,7 @@ def __getitem__(self, idx):
                 'ref_path': x_ref,
                 'ref_cls': y_ref,
             }
-        
+
         if hasattr(self, 'preprocess') and self.preprocess:
             datas = self.preprocess(datas)
 
@@ -173,6 +179,6 @@ def __getitem__(self, idx):
 
     def __len__(self):
         return self.counts
-    
+
     def prepare_data_infos(self, dataroot):
         pass
diff --git a/ppgan/models/animeganv2_model.py b/ppgan/models/animeganv2_model.py
index 7bceb36c70d0ef..c2ee5de2e6bbec 100644
--- a/ppgan/models/animeganv2_model.py
+++ b/ppgan/models/animeganv2_model.py
@@ -27,6 +27,8 @@
 
 @MODELS.register()
 class AnimeGANV2Model(BaseModel):
+    """ This class implements the AnimeGANV2 model.
+    """
     def __init__(self,
                  generator,
                  discriminator=None,
@@ -40,8 +42,10 @@ def __init__(self,
                  tv_weight=1.):
         """Initialize the AnimeGANV2 class.
 
-        Parameters:
-            opt (config dict)-- stores all the experiment flags; needs to be a subclass of Dict
+        Args:
+            generator (dict): config of generator.
+            discriminator (dict): config of discriminator.
+            gan_criterion (dict): config of gan criterion.
         """
         super(AnimeGANV2Model, self).__init__()
         self.g_adv_weight = g_adv_weight
@@ -54,7 +58,7 @@ def __init__(self,
         self.nets['netG'] = build_generator(generator)
         init_weights(self.nets['netG'])
 
-        # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc
+        # define a discriminator
         if self.is_train:
             self.nets['netD'] = build_discriminator(discriminator)
             init_weights(self.nets['netD'])
@@ -87,14 +91,14 @@ def setup_input(self, input):
 
     def forward(self):
         """Run forward pass; called by both functions <optimize_parameters> and <test>."""
-        self.fake = self.nets['netG'](self.real)  # G(A)
+        self.fake = self.nets['netG'](self.real)
 
         # put items to visual dict
         self.visual_items['real'] = self.real
         self.visual_items['fake'] = self.fake
 
     def test(self):
-        self.fake = self.nets['netG'](self.real)  # G(A)
+        self.fake = self.nets['netG'](self.real)
 
         # put items to visual dict
         self.visual_items['real'] = self.real
diff --git a/ppgan/models/discriminators/dcdiscriminator.py b/ppgan/models/discriminators/dcdiscriminator.py
index f66b49a84ebca5..cd964cf91564ad 100644
--- a/ppgan/models/discriminators/dcdiscriminator.py
+++ b/ppgan/models/discriminators/dcdiscriminator.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/aidotse/Team-Haste
+# MIT License
+# Copyright (c) 2020 AI Sweden
+
 import paddle
 import functools
 import numpy as np
@@ -31,15 +35,14 @@ def __init__(self, input_nc, ndf=64, norm_type='instance'):
         """Construct a DCGAN discriminator
 
         Parameters:
-            input_nc (int)  -- the number of channels in input images
-            ndf (int)       -- the number of filters in the last conv layer
-            norm_type (str)      -- normalization layer type
+            input_nc (int): the number of channels in input images
+            ndf (int): the number of filters in the last conv layer
+            norm_type (str): normalization layer type
         """
         super(DCDiscriminator, self).__init__()
         norm_layer = build_norm_layer(norm_type)
-        if type(
-                norm_layer
-        ) == functools.partial:  # no need to use bias as BatchNorm2d has affine parameters
+        if type(norm_layer) == functools.partial:
+            # no need to use bias as BatchNorm2d has affine parameters
             use_bias = norm_layer.func == nn.BatchNorm2D
         else:
             use_bias = norm_layer == nn.BatchNorm2D
@@ -48,29 +51,30 @@ def __init__(self, input_nc, ndf=64, norm_type='instance'):
         padw = 1
 
         sequence = [
-                nn.Conv2D(input_nc,
-                          ndf,
-                          kernel_size=kw,
-                          stride=2,
-                          padding=padw,
-                          bias_attr=use_bias),
-                nn.LeakyReLU(0.2)
-            ]
+            nn.Conv2D(input_nc,
+                      ndf,
+                      kernel_size=kw,
+                      stride=2,
+                      padding=padw,
+                      bias_attr=use_bias),
+            nn.LeakyReLU(0.2)
+        ]
 
         nf_mult = 1
         nf_mult_prev = 1
         n_downsampling = 4
 
-        for n in range(1, n_downsampling):  # gradually increase the number of filters
+        # gradually increase the number of filters
+        for n in range(1, n_downsampling):
             nf_mult_prev = nf_mult
             nf_mult = min(2**n, 8)
             if norm_type == 'batch':
                 sequence += [
                     nn.Conv2D(ndf * nf_mult_prev,
-                                  ndf * nf_mult,
-                                  kernel_size=kw,
-                                  stride=2,
-                                  padding=padw),
+                              ndf * nf_mult,
+                              kernel_size=kw,
+                              stride=2,
+                              padding=padw),
                     BatchNorm2D(ndf * nf_mult),
                     nn.LeakyReLU(0.2)
                 ]
@@ -88,13 +92,14 @@ def __init__(self, input_nc, ndf=64, norm_type='instance'):
 
         nf_mult_prev = nf_mult
 
+        # output 1 channel prediction map
         sequence += [
-                nn.Conv2D(ndf * nf_mult_prev,
-                          1,
-                          kernel_size=kw,
-                          stride=1,
-                          padding=0)
-            ]  # output 1 channel prediction map
+            nn.Conv2D(ndf * nf_mult_prev,
+                      1,
+                      kernel_size=kw,
+                      stride=1,
+                      padding=0)
+        ]
 
         self.model = nn.Sequential(*sequence)
 
diff --git a/ppgan/models/discriminators/discriminator_animegan.py b/ppgan/models/discriminators/discriminator_animegan.py
index d0c7badea1dabe..c06ad72f7d8aa3 100644
--- a/ppgan/models/discriminators/discriminator_animegan.py
+++ b/ppgan/models/discriminators/discriminator_animegan.py
@@ -1,16 +1,6 @@
-#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# code was heavily based on https://github.com/TachibanaYoshino/AnimeGANv2
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/TachibanaYoshino/AnimeGANv2#license
 
 import paddle.nn as nn
 import paddle.nn.functional as F
diff --git a/ppgan/models/discriminators/discriminator_starganv2.py b/ppgan/models/discriminators/discriminator_starganv2.py
index a2ff50eb0fc2a1..4525d4b95d93fa 100644
--- a/ppgan/models/discriminators/discriminator_starganv2.py
+++ b/ppgan/models/discriminators/discriminator_starganv2.py
@@ -1,3 +1,6 @@
+# code was heavily based on https://github.com/clovaai/stargan-v2
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/clovaai/stargan-v2#license
 
 import paddle.nn as nn
 import paddle
@@ -18,7 +21,7 @@ def __init__(self, img_size=256, num_domains=2, max_conv_dim=512):
 
         repeat_num = int(np.log2(img_size)) - 2
         for _ in range(repeat_num):
-            dim_out = min(dim_in*2, max_conv_dim)
+            dim_out = min(dim_in * 2, max_conv_dim)
             blocks += [ResBlk(dim_in, dim_out, downsample=True)]
             dim_in = dim_out
 
diff --git a/ppgan/models/discriminators/discriminator_styleganv2.py b/ppgan/models/discriminators/discriminator_styleganv2.py
index 038d39ab5f2437..8acea70ede149f 100644
--- a/ppgan/models/discriminators/discriminator_styleganv2.py
+++ b/ppgan/models/discriminators/discriminator_styleganv2.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/rosinality/stylegan2-pytorch
+# MIT License
+# Copyright (c) 2019 Kim Seonghyeon
+
 import math
 import paddle
 import paddle.nn as nn
diff --git a/ppgan/models/firstorder_model.py b/ppgan/models/firstorder_model.py
index a8a59f659cc4c2..d1d4e81600d356 100755
--- a/ppgan/models/firstorder_model.py
+++ b/ppgan/models/firstorder_model.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 
 # code was heavily based on https://github.com/AliaksandrSiarohin/first-order-model
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/AliaksandrSiarohin/first-order-model/blob/master/LICENSE.md
 
 import paddle
 
diff --git a/ppgan/models/generators/dcgenerator.py b/ppgan/models/generators/dcgenerator.py
index 5bbdbb33e84358..abbc633b9cbbad 100644
--- a/ppgan/models/generators/dcgenerator.py
+++ b/ppgan/models/generators/dcgenerator.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/aidotse/Team-Haste
+# MIT License
+# Copyright (c) 2020 AI Sweden
+
 import paddle
 import paddle.nn as nn
 import functools
@@ -26,7 +30,6 @@
 class DCGenerator(nn.Layer):
     """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations.
 
-    code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style)
     """
     def __init__(self,
                  input_nz,
@@ -38,12 +41,12 @@ def __init__(self,
         """Construct a DCGenerator generator
 
         Args:
-            input_nz (int)      -- the number of dimension in input noise
-            input_nc (int)      -- the number of channels in input images
-            output_nc (int)     -- the number of channels in output images
-            ngf (int)           -- the number of filters in the last conv layer
-            norm_layer          -- normalization layer
-            padding_type (str)  -- the name of padding layer in conv layers: reflect | replicate | zero
+            input_nz (int): the number of dimension in input noise
+            input_nc (int): the number of channels in input images
+            output_nc (int): the number of channels in output images
+            ngf (int): the number of filters in the last conv layer
+            norm_layer: normalization layer
+            padding_type (str): the name of padding layer in conv layers: reflect | replicate | zero
         """
         super(DCGenerator, self).__init__()
 
@@ -59,65 +62,66 @@ def __init__(self,
         if norm_type == 'batch':
             model = [
                 nn.Conv2DTranspose(input_nz,
-                                    ngf * mult,
-                                    kernel_size=4,
-                                    stride=1,
-                                    padding=0,
-                                    bias_attr=use_bias),
+                                   ngf * mult,
+                                   kernel_size=4,
+                                   stride=1,
+                                   padding=0,
+                                   bias_attr=use_bias),
                 BatchNorm2D(ngf * mult),
                 nn.ReLU()
             ]
         else:
             model = [
                 nn.Conv2DTranspose(input_nz,
-                                    ngf * mult,
-                                    kernel_size=4,
-                                    stride=1,
-                                    padding=0,
-                                    bias_attr=use_bias),
+                                   ngf * mult,
+                                   kernel_size=4,
+                                   stride=1,
+                                   padding=0,
+                                   bias_attr=use_bias),
                 norm_layer(ngf * mult),
                 nn.ReLU()
             ]
 
-        for i in range(1,n_downsampling):  # add upsampling layers
+        # add upsampling layers
+        for i in range(1, n_downsampling):
             mult = 2**(n_downsampling - i)
-            output_size = 2**(i+2)
+            output_size = 2**(i + 2)
             if norm_type == 'batch':
                 model += [
-                nn.Conv2DTranspose(ngf * mult,
-                                    ngf * mult//2,
-                                    kernel_size=4,
-                                    stride=2,
-                                    padding=1,
-                                    bias_attr=use_bias),
-                BatchNorm2D(ngf * mult//2),
-                nn.ReLU()
-            ]
+                    nn.Conv2DTranspose(ngf * mult,
+                                       ngf * mult // 2,
+                                       kernel_size=4,
+                                       stride=2,
+                                       padding=1,
+                                       bias_attr=use_bias),
+                    BatchNorm2D(ngf * mult // 2),
+                    nn.ReLU()
+                ]
             else:
                 model += [
                     nn.Conv2DTranspose(ngf * mult,
-                                    int(ngf * mult//2),
-                                    kernel_size=4,
-                                    stride=2,
-                                    padding=1,
-                                    bias_attr=use_bias),
+                                       int(ngf * mult // 2),
+                                       kernel_size=4,
+                                       stride=2,
+                                       padding=1,
+                                       bias_attr=use_bias),
                     norm_layer(int(ngf * mult // 2)),
                     nn.ReLU()
                 ]
 
         output_size = 2**(6)
         model += [
-                nn.Conv2DTranspose(ngf ,
-                                output_nc,
-                                kernel_size=4,
-                                stride=2,
-                                padding=1,
-                                bias_attr=use_bias),
-                nn.Tanh()
-                ]
+            nn.Conv2DTranspose(ngf,
+                               output_nc,
+                               kernel_size=4,
+                               stride=2,
+                               padding=1,
+                               bias_attr=use_bias),
+            nn.Tanh()
+        ]
 
         self.model = nn.Sequential(*model)
 
     def forward(self, x):
         """Standard forward"""
-        return self.model(x)
\ No newline at end of file
+        return self.model(x)
diff --git a/ppgan/models/generators/deep_conv.py b/ppgan/models/generators/deep_conv.py
index 9712c9f6b1c505..0e757cbecbe8c7 100644
--- a/ppgan/models/generators/deep_conv.py
+++ b/ppgan/models/generators/deep_conv.py
@@ -21,32 +21,33 @@
 
 @GENERATORS.register()
 class DeepConvGenerator(nn.Layer):
-    """Create a Deep Convolutional generator"""
+    """Create a Deep Convolutional generator
+       Refer to https://arxiv.org/abs/1511.06434
+    """
     def __init__(self, latent_dim, output_nc, size=64, ngf=64):
         """Construct a Deep Convolutional generator
         Args:
-            latent_dim (int)    -- the number of latent dimension
-            output_nc (int)     -- the number of channels in output images
-            size (int)          -- size of output tensor
-            ngf (int)           -- the number of filters in the last conv layer
-
-        Refer to https://arxiv.org/abs/1511.06434
+            latent_dim (int): the number of latent dimension
+            output_nc (int): the number of channels in output images
+            size (int): size of output tensor
+            ngf (int): the number of filters in the last conv layer
         """
         super(DeepConvGenerator, self).__init__()
 
         self.latent_dim = latent_dim
         self.ngf = ngf
         self.init_size = size // 4
-        self.l1 = nn.Sequential(nn.Linear(latent_dim, ngf*2 * self.init_size ** 2))
+        self.l1 = nn.Sequential(
+            nn.Linear(latent_dim, ngf * 2 * self.init_size**2))
 
         self.conv_blocks = nn.Sequential(
-            nn.BatchNorm2D(ngf*2),
+            nn.BatchNorm2D(ngf * 2),
             nn.Upsample(scale_factor=2),
-            nn.Conv2D(ngf*2, ngf*2, 3, stride=1, padding=1),
-            nn.BatchNorm2D(ngf*2, 0.2),
+            nn.Conv2D(ngf * 2, ngf * 2, 3, stride=1, padding=1),
+            nn.BatchNorm2D(ngf * 2, 0.2),
             nn.LeakyReLU(0.2),
             nn.Upsample(scale_factor=2),
-            nn.Conv2D(ngf*2, ngf, 3, stride=1, padding=1),
+            nn.Conv2D(ngf * 2, ngf, 3, stride=1, padding=1),
             nn.BatchNorm2D(ngf, 0.2),
             nn.LeakyReLU(0.2),
             nn.Conv2D(ngf, output_nc, 3, stride=1, padding=1),
@@ -55,24 +56,36 @@ def __init__(self, latent_dim, output_nc, size=64, ngf=64):
 
     def random_inputs(self, batch_size):
         return paddle.randn([batch_size, self.latent_dim])
-    
+
     def forward(self, z):
         out = self.l1(z)
-        out = out.reshape([out.shape[0], self.ngf * 2, self.init_size, self.init_size])
+        out = out.reshape(
+            [out.shape[0], self.ngf * 2, self.init_size, self.init_size])
         img = self.conv_blocks(out)
         return img
 
 
 @GENERATORS.register()
 class ConditionalDeepConvGenerator(DeepConvGenerator):
+    """Create a Conditional Deep Convolutional generator
+    """
     def __init__(self, latent_dim, output_nc, n_class=10, **kwargs):
-        super(ConditionalDeepConvGenerator, self).__init__(latent_dim + n_class, output_nc, **kwargs)
+        """Construct a Conditional Deep Convolutional generator
+        Args:
+            latent_dim (int): the number of latent dimension
+            output_nc (int): the number of channels in output images
+            n_class (int): the number of class
+        """
+        super(ConditionalDeepConvGenerator,
+              self).__init__(latent_dim + n_class, output_nc, **kwargs)
 
         self.n_class = n_class
         self.latent_dim = latent_dim
-    
+
     def random_inputs(self, batch_size):
-        return_list = [super(ConditionalDeepConvGenerator, self).random_inputs(batch_size)]
+        return_list = [
+            super(ConditionalDeepConvGenerator, self).random_inputs(batch_size)
+        ]
         class_id = paddle.randint(0, self.n_class, [batch_size])
         return return_list + [class_id]
 
@@ -82,5 +95,5 @@ def forward(self, x, class_id=None):
             class_id = F.one_hot(class_id, self.n_class).astype('float32')
             class_id = class_id.reshape([x.shape[0], -1])
             x = paddle.concat([x, class_id], 1)
-        
+
         return super(ConditionalDeepConvGenerator, self).forward(x)
diff --git a/ppgan/models/generators/generater_animegan.py b/ppgan/models/generators/generater_animegan.py
index a2b09fe453e890..2d3f3aa037cc76 100644
--- a/ppgan/models/generators/generater_animegan.py
+++ b/ppgan/models/generators/generater_animegan.py
@@ -1,16 +1,6 @@
-#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# code was heavily based on https://github.com/TachibanaYoshino/AnimeGANv2
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/TachibanaYoshino/AnimeGANv2#license
 
 import paddle
 import paddle.nn as nn
diff --git a/ppgan/models/generators/generator_pixel2style2pixel.py b/ppgan/models/generators/generator_pixel2style2pixel.py
index 1651cc54c01b45..04f57ee82a04c6 100644
--- a/ppgan/models/generators/generator_pixel2style2pixel.py
+++ b/ppgan/models/generators/generator_pixel2style2pixel.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/eladrich/pixel2style2pixel
+# MIT License
+# Copyright (c) 2020 Elad Richardson, Yuval Alaluf
+
 import math
 import numpy as np
 import paddle
@@ -41,7 +45,8 @@ class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
 
 
 def get_block(in_channel, depth, num_units, stride=2):
-    return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+    return [Bottleneck(in_channel, depth, stride)
+            ] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
 
 
 def get_blocks(num_layers):
@@ -67,7 +72,9 @@ def get_blocks(num_layers):
             get_block(in_channel=256, depth=512, num_units=3)
         ]
     else:
-        raise ValueError("Invalid number of layers: {}. Must be one of [50, 100, 152]".format(num_layers))
+        raise ValueError(
+            "Invalid number of layers: {}. Must be one of [50, 100, 152]".
+            format(num_layers))
     return blocks
 
 
@@ -75,9 +82,17 @@ class SEModule(nn.Layer):
     def __init__(self, channels, reduction):
         super(SEModule, self).__init__()
         self.avg_pool = nn.AdaptiveAvgPool2D(1)
-        self.fc1 = nn.Conv2D(channels, channels // reduction, kernel_size=1, padding=0, bias_attr=False)
+        self.fc1 = nn.Conv2D(channels,
+                             channels // reduction,
+                             kernel_size=1,
+                             padding=0,
+                             bias_attr=False)
         self.relu = nn.ReLU()
-        self.fc2 = nn.Conv2D(channels // reduction, channels, kernel_size=1, padding=0, bias_attr=False)
+        self.fc2 = nn.Conv2D(channels // reduction,
+                             channels,
+                             kernel_size=1,
+                             padding=0,
+                             bias_attr=False)
         self.sigmoid = nn.Sigmoid()
 
     def forward(self, x):
@@ -98,13 +113,13 @@ def __init__(self, in_channel, depth, stride):
         else:
             self.shortcut_layer = nn.Sequential(
                 nn.Conv2D(in_channel, depth, (1, 1), stride, bias_attr=False),
-                nn.BatchNorm2D(depth)
-            )
+                nn.BatchNorm2D(depth))
         self.res_layer = nn.Sequential(
             nn.BatchNorm2D(in_channel),
-            nn.Conv2D(in_channel, depth, (3, 3), (1, 1), 1, bias_attr=False), nn.PReLU(depth),
-            nn.Conv2D(depth, depth, (3, 3), stride, 1, bias_attr=False), nn.BatchNorm2D(depth)
-        )
+            nn.Conv2D(in_channel, depth, (3, 3), (1, 1), 1, bias_attr=False),
+            nn.PReLU(depth),
+            nn.Conv2D(depth, depth, (3, 3), stride, 1, bias_attr=False),
+            nn.BatchNorm2D(depth))
 
     def forward(self, x):
         shortcut = self.shortcut_layer(x)
@@ -120,16 +135,13 @@ def __init__(self, in_channel, depth, stride):
         else:
             self.shortcut_layer = nn.Sequential(
                 nn.Conv2D(in_channel, depth, (1, 1), stride, bias_attr=False),
-                nn.BatchNorm2D(depth)
-            )
+                nn.BatchNorm2D(depth))
         self.res_layer = nn.Sequential(
             nn.BatchNorm2D(in_channel),
             nn.Conv2D(in_channel, depth, (3, 3), (1, 1), 1, bias_attr=False),
             nn.PReLU(depth),
             nn.Conv2D(depth, depth, (3, 3), stride, 1, bias_attr=False),
-            nn.BatchNorm2D(depth),
-            SEModule(depth, 16)
-        )
+            nn.BatchNorm2D(depth), SEModule(depth, 16))
 
     def forward(self, x):
         shortcut = self.shortcut_layer(x)
@@ -144,8 +156,10 @@ def __init__(self, in_c, out_c, spatial):
         self.spatial = spatial
         num_pools = int(np.log2(spatial))
         modules = []
-        modules += [nn.Conv2D(in_c, out_c, kernel_size=3, stride=2, padding=1),
-                    nn.LeakyReLU()]
+        modules += [
+            nn.Conv2D(in_c, out_c, kernel_size=3, stride=2, padding=1),
+            nn.LeakyReLU()
+        ]
         for i in range(num_pools - 1):
             modules += [
                 nn.Conv2D(out_c, out_c, kernel_size=3, stride=2, padding=1),
@@ -164,22 +178,23 @@ def forward(self, x):
 class GradualStyleEncoder(nn.Layer):
     def __init__(self, num_layers, mode='ir', opts=None):
         super(GradualStyleEncoder, self).__init__()
-        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert num_layers in [50, 100,
+                              152], 'num_layers should be 50,100, or 152'
         assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
         blocks = get_blocks(num_layers)
         if mode == 'ir':
             unit_module = BottleneckIR
         elif mode == 'ir_se':
             unit_module = BottleneckIRSE
-        self.input_layer = nn.Sequential(nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False),
-                                         nn.BatchNorm2D(64),
-                                         nn.PReLU(64))
+        self.input_layer = nn.Sequential(
+            nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False),
+            nn.BatchNorm2D(64), nn.PReLU(64))
         modules = []
         for block in blocks:
             for bottleneck in block:
-                modules.append(unit_module(bottleneck.in_channel,
-                                           bottleneck.depth,
-                                           bottleneck.stride))
+                modules.append(
+                    unit_module(bottleneck.in_channel, bottleneck.depth,
+                                bottleneck.stride))
         self.body = nn.Sequential(*modules)
 
         self.styles = nn.LayerList()
@@ -214,7 +229,8 @@ def _upsample_add(self, x, y):
         So we choose bilinear upsample which supports arbitrary output sizes.
         '''
         _, _, H, W = y.shape
-        return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y
+        return F.interpolate(
+            x, size=(H, W), mode='bilinear', align_corners=True) + y
 
     def forward(self, x):
         x = self.input_layer(x)
@@ -249,24 +265,25 @@ class BackboneEncoderUsingLastLayerIntoW(nn.Layer):
     def __init__(self, num_layers, mode='ir', opts=None):
         super(BackboneEncoderUsingLastLayerIntoW, self).__init__()
         print('Using BackboneEncoderUsingLastLayerIntoW')
-        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert num_layers in [50, 100,
+                              152], 'num_layers should be 50,100, or 152'
         assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
         blocks = get_blocks(num_layers)
         if mode == 'ir':
             unit_module = BottleneckIR
         elif mode == 'ir_se':
             unit_module = BottleneckIRSE
-        self.input_layer = nn.Sequential(nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False),
-                                         nn.BatchNorm2D(64),
-                                         nn.PReLU(64))
+        self.input_layer = nn.Sequential(
+            nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False),
+            nn.BatchNorm2D(64), nn.PReLU(64))
         self.output_pool = nn.AdaptiveAvgPool2D((1, 1))
         self.linear = EqualLinear(512, 512, lr_mul=1)
         modules = []
         for block in blocks:
             for bottleneck in block:
-                modules.append(unit_module(bottleneck.in_channel,
-                                           bottleneck.depth,
-                                           bottleneck.stride))
+                modules.append(
+                    unit_module(bottleneck.in_channel, bottleneck.depth,
+                                bottleneck.stride))
         self.body = nn.Sequential(*modules)
 
     def forward(self, x):
@@ -282,16 +299,17 @@ class BackboneEncoderUsingLastLayerIntoWPlus(nn.Layer):
     def __init__(self, num_layers, mode='ir', opts=None):
         super(BackboneEncoderUsingLastLayerIntoWPlus, self).__init__()
         print('Using BackboneEncoderUsingLastLayerIntoWPlus')
-        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert num_layers in [50, 100,
+                              152], 'num_layers should be 50,100, or 152'
         assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
         blocks = get_blocks(num_layers)
         if mode == 'ir':
             unit_module = BottleneckIR
         elif mode == 'ir_se':
             unit_module = BottleneckIRSE
-        self.input_layer = nn.Sequential(nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False),
-                                         nn.BatchNorm2D(64),
-                                         nn.PReLU(64))
+        self.input_layer = nn.Sequential(
+            nn.Conv2D(opts.input_nc, 64, (3, 3), 1, 1, bias_attr=False),
+            nn.BatchNorm2D(64), nn.PReLU(64))
         self.output_layer_2 = nn.Sequential(nn.BatchNorm2D(512),
                                             nn.AdaptiveAvgPool2D((7, 7)),
                                             Flatten(),
@@ -300,9 +318,9 @@ def __init__(self, num_layers, mode='ir', opts=None):
         modules = []
         for block in blocks:
             for bottleneck in block:
-                modules.append(unit_module(bottleneck.in_channel,
-                                           bottleneck.depth,
-                                           bottleneck.stride))
+                modules.append(
+                    unit_module(bottleneck.in_channel, bottleneck.depth,
+                                bottleneck.stride))
         self.body = nn.Sequential(*modules)
 
     def forward(self, x):
@@ -321,15 +339,19 @@ def __init__(self, opts):
         self.set_opts(opts)
         # Define architecture
         self.encoder = self.set_encoder()
-        self.decoder = StyleGANv2Generator(opts.size, opts.style_dim, opts.n_mlp, opts.channel_multiplier)
+        self.decoder = StyleGANv2Generator(opts.size, opts.style_dim,
+                                           opts.n_mlp, opts.channel_multiplier)
         self.face_pool = nn.AdaptiveAvgPool2D((256, 256))
         self.style_dim = self.decoder.style_dim
         self.n_latent = self.decoder.n_latent
         if self.opts.start_from_latent_avg:
             if self.opts.learn_in_w:
-                self.register_buffer('latent_avg', paddle.zeros([1, self.style_dim]))
+                self.register_buffer('latent_avg',
+                                     paddle.zeros([1, self.style_dim]))
             else:
-                self.register_buffer('latent_avg', paddle.zeros([1, self.n_latent, self.style_dim]))
+                self.register_buffer(
+                    'latent_avg',
+                    paddle.zeros([1, self.n_latent, self.style_dim]))
 
     def set_encoder(self):
         if self.opts.encoder_type == 'GradualStyleEncoder':
@@ -337,13 +359,22 @@ def set_encoder(self):
         elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoW':
             encoder = BackboneEncoderUsingLastLayerIntoW(50, 'ir_se', self.opts)
         elif self.opts.encoder_type == 'BackboneEncoderUsingLastLayerIntoWPlus':
-            encoder = BackboneEncoderUsingLastLayerIntoWPlus(50, 'ir_se', self.opts)
+            encoder = BackboneEncoderUsingLastLayerIntoWPlus(
+                50, 'ir_se', self.opts)
         else:
-            raise Exception('{} is not a valid encoders'.format(self.opts.encoder_type))
+            raise Exception('{} is not a valid encoders'.format(
+                self.opts.encoder_type))
         return encoder
 
-    def forward(self, x, resize=True, latent_mask=None, input_code=False, randomize_noise=True,
-                inject_latent=None, return_latents=False, alpha=None):
+    def forward(self,
+                x,
+                resize=True,
+                latent_mask=None,
+                input_code=False,
+                randomize_noise=True,
+                inject_latent=None,
+                return_latents=False,
+                alpha=None):
         if input_code:
             codes = x
         else:
@@ -355,12 +386,12 @@ def forward(self, x, resize=True, latent_mask=None, input_code=False, randomize_
                 else:
                     codes = codes + self.latent_avg.tile([codes.shape[0], 1, 1])
 
-
         if latent_mask is not None:
             for i in latent_mask:
                 if inject_latent is not None:
                     if alpha is not None:
-                        codes[:, i] = alpha * inject_latent[:, i] + (1 - alpha) * codes[:, i]
+                        codes[:, i] = alpha * inject_latent[:, i] + (
+                            1 - alpha) * codes[:, i]
                     else:
                         codes[:, i] = inject_latent[:, i]
                 else:
diff --git a/ppgan/models/generators/generator_starganv2.py b/ppgan/models/generators/generator_starganv2.py
index ad1aedbb42f12b..a8cef90cf88fcd 100755
--- a/ppgan/models/generators/generator_starganv2.py
+++ b/ppgan/models/generators/generator_starganv2.py
@@ -1,4 +1,6 @@
-
+# code was heavily based on https://github.com/clovaai/stargan-v2
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/clovaai/stargan-v2#license
 import paddle
 from paddle import nn
 import paddle.nn.functional as F
@@ -13,25 +15,30 @@
 
 FAN_WEIGHT_URL = "https://paddlegan.bj.bcebos.com/models/wing.pdparams"
 
+
 class AvgPool2D(nn.Layer):
     """
-    AvgPool2D 
+    AvgPool2D
     Peplace avg_pool2d because paddle.grad will cause avg_pool2d to report an error when training.
     In the future Paddle framework will supports avg_pool2d and remove this class.
     """
     def __init__(self):
         super(AvgPool2D, self).__init__()
-        self.filter = paddle.to_tensor([[1, 1],
-                                    [1, 1]], dtype='float32')
+        self.filter = paddle.to_tensor([[1, 1], [1, 1]], dtype='float32')
 
     def forward(self, x):
-        filter = self.filter.unsqueeze(0).unsqueeze(1).tile([x.shape[1], 1, 1, 1])
+        filter = self.filter.unsqueeze(0).unsqueeze(1).tile(
+            [x.shape[1], 1, 1, 1])
         return F.conv2d(x, filter, stride=2, padding=0, groups=x.shape[1]) / 4
 
 
 class ResBlk(nn.Layer):
-    def __init__(self, dim_in, dim_out, actv=nn.LeakyReLU(0.2),
-                 normalize=False, downsample=False):
+    def __init__(self,
+                 dim_in,
+                 dim_out,
+                 actv=nn.LeakyReLU(0.2),
+                 normalize=False,
+                 downsample=False):
         super().__init__()
         self.actv = actv
         self.normalize = normalize
@@ -43,8 +50,12 @@ def _build_weights(self, dim_in, dim_out):
         self.conv1 = nn.Conv2D(dim_in, dim_in, 3, 1, 1)
         self.conv2 = nn.Conv2D(dim_in, dim_out, 3, 1, 1)
         if self.normalize:
-            self.norm1 = nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True)
-            self.norm2 = nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True)
+            self.norm1 = nn.InstanceNorm2D(dim_in,
+                                           weight_attr=True,
+                                           bias_attr=True)
+            self.norm2 = nn.InstanceNorm2D(dim_in,
+                                           weight_attr=True,
+                                           bias_attr=True)
         if self.learned_sc:
             self.conv1x1 = nn.Conv2D(dim_in, dim_out, 1, 1, 0, bias_attr=False)
 
@@ -76,8 +87,10 @@ def forward(self, x):
 class AdaIN(nn.Layer):
     def __init__(self, style_dim, num_features):
         super().__init__()
-        self.norm = nn.InstanceNorm2D(num_features, weight_attr=False, bias_attr=False)
-        self.fc = nn.Linear(style_dim, num_features*2)
+        self.norm = nn.InstanceNorm2D(num_features,
+                                      weight_attr=False,
+                                      bias_attr=False)
+        self.fc = nn.Linear(style_dim, num_features * 2)
 
     def forward(self, x, s):
         h = self.fc(s)
@@ -88,8 +101,13 @@ def forward(self, x, s):
 
 
 class AdainResBlk(nn.Layer):
-    def __init__(self, dim_in, dim_out, style_dim=64, w_hpf=0,
-                 actv=nn.LeakyReLU(0.2), upsample=False):
+    def __init__(self,
+                 dim_in,
+                 dim_out,
+                 style_dim=64,
+                 w_hpf=0,
+                 actv=nn.LeakyReLU(0.2),
+                 upsample=False):
         super().__init__()
         self.w_hpf = w_hpf
         self.actv = actv
@@ -133,13 +151,13 @@ def forward(self, x, s):
 class HighPass(nn.Layer):
     def __init__(self, w_hpf):
         super(HighPass, self).__init__()
-        self.filter = paddle.to_tensor([[-1, -1, -1],
-                                    [-1, 8., -1],
-                                    [-1, -1, -1]]) / w_hpf
+        self.filter = paddle.to_tensor([[-1, -1, -1], [-1, 8., -1],
+                                        [-1, -1, -1]]) / w_hpf
 
     def forward(self, x):
         # filter = self.filter.unsqueeze(0).unsqueeze(1).repeat(x.size(1), 1, 1, 1)
-        filter = self.filter.unsqueeze(0).unsqueeze(1).tile([x.shape[1], 1, 1, 1])
+        filter = self.filter.unsqueeze(0).unsqueeze(1).tile(
+            [x.shape[1], 1, 1, 1])
         return F.conv2d(x, filter, padding=1, groups=x.shape[1])
 
 
@@ -154,30 +172,35 @@ def __init__(self, img_size=256, style_dim=64, max_conv_dim=512, w_hpf=1):
         self.decode = nn.LayerList()
         self.to_rgb = nn.Sequential(
             nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True),
-            nn.LeakyReLU(0.2),
-            nn.Conv2D(dim_in, 3, 1, 1, 0))
+            nn.LeakyReLU(0.2), nn.Conv2D(dim_in, 3, 1, 1, 0))
 
         # down/up-sampling blocks
         repeat_num = int(np.log2(img_size)) - 4
         if w_hpf > 0:
             repeat_num += 1
         for _ in range(repeat_num):
-            dim_out = min(dim_in*2, max_conv_dim)
+            dim_out = min(dim_in * 2, max_conv_dim)
             self.encode.append(
                 ResBlk(dim_in, dim_out, normalize=True, downsample=True))
             if len(self.decode) == 0:
-                self.decode.append(AdainResBlk(dim_out, dim_in, style_dim,
-                                w_hpf=w_hpf, upsample=True))
+                self.decode.append(
+                    AdainResBlk(dim_out,
+                                dim_in,
+                                style_dim,
+                                w_hpf=w_hpf,
+                                upsample=True))
             else:
-                self.decode.insert(
-                    0, AdainResBlk(dim_out, dim_in, style_dim,
-                                w_hpf=w_hpf, upsample=True))  # stack-like
+                self.decode.insert(0,
+                                   AdainResBlk(dim_out,
+                                               dim_in,
+                                               style_dim,
+                                               w_hpf=w_hpf,
+                                               upsample=True))  # stack-like
             dim_in = dim_out
 
         # bottleneck blocks
         for _ in range(2):
-            self.encode.append(
-                ResBlk(dim_out, dim_out, normalize=True))
+            self.encode.append(ResBlk(dim_out, dim_out, normalize=True))
             self.decode.insert(
                 0, AdainResBlk(dim_out, dim_out, style_dim, w_hpf=w_hpf))
 
@@ -195,7 +218,9 @@ def forward(self, x, s, masks=None):
             x = block(x, s)
             if (masks is not None) and (x.shape[2] in [32, 64, 128]):
                 mask = masks[0] if x.shape[2] in [32] else masks[1]
-                mask = F.interpolate(mask, size=[x.shape[2], x.shape[2]], mode='bilinear')
+                mask = F.interpolate(mask,
+                                     size=[x.shape[2], x.shape[2]],
+                                     mode='bilinear')
                 x = x + self.hpf(mask * cache[x.shape[2]])
         return self.to_rgb(x)
 
@@ -214,13 +239,11 @@ def __init__(self, latent_dim=16, style_dim=64, num_domains=2):
 
         self.unshared = nn.LayerList()
         for _ in range(num_domains):
-            self.unshared.append(nn.Sequential(nn.Linear(512, 512),
-                                            nn.ReLU(),
-                                            nn.Linear(512, 512),
-                                            nn.ReLU(),
-                                            nn.Linear(512, 512),
-                                            nn.ReLU(),
-                                            nn.Linear(512, style_dim)))
+            self.unshared.append(
+                nn.Sequential(nn.Linear(512, 512),
+                              nn.ReLU(), nn.Linear(512, 512), nn.ReLU(),
+                              nn.Linear(512, 512), nn.ReLU(),
+                              nn.Linear(512, style_dim)))
 
     def forward(self, z, y):
         h = self.shared(z)
@@ -231,7 +254,10 @@ def forward(self, z, y):
         idx = paddle.to_tensor(np.array(range(y.shape[0]))).astype('int')
         s = []
         for i in range(idx.shape[0]):
-            s += [out[idx[i].numpy().astype(np.int).tolist()[0], y[i].numpy().astype(np.int).tolist()[0]]]
+            s += [
+                out[idx[i].numpy().astype(np.int).tolist()[0],
+                    y[i].numpy().astype(np.int).tolist()[0]]
+            ]
         s = paddle.stack(s)
         s = paddle.reshape(s, (s.shape[0], -1))
         return s
@@ -239,7 +265,11 @@ def forward(self, z, y):
 
 @GENERATORS.register()
 class StarGANv2Style(nn.Layer):
-    def __init__(self, img_size=256, style_dim=64, num_domains=2, max_conv_dim=512):
+    def __init__(self,
+                 img_size=256,
+                 style_dim=64,
+                 num_domains=2,
+                 max_conv_dim=512):
         super().__init__()
         dim_in = 2**14 // img_size
         blocks = []
@@ -247,7 +277,7 @@ def __init__(self, img_size=256, style_dim=64, num_domains=2, max_conv_dim=512):
 
         repeat_num = int(np.log2(img_size)) - 2
         for _ in range(repeat_num):
-            dim_out = min(dim_in*2, max_conv_dim)
+            dim_out = min(dim_in * 2, max_conv_dim)
             blocks += [ResBlk(dim_in, dim_out, downsample=True)]
             dim_in = dim_out
 
@@ -270,7 +300,10 @@ def forward(self, x, y):
         idx = paddle.to_tensor(np.array(range(y.shape[0]))).astype('int')
         s = []
         for i in range(idx.shape[0]):
-            s += [out[idx[i].numpy().astype(np.int).tolist()[0], y[i].numpy().astype(np.int).tolist()[0]]]
+            s += [
+                out[idx[i].numpy().astype(np.int).tolist()[0],
+                    y[i].numpy().astype(np.int).tolist()[0]]
+            ]
         s = paddle.stack(s)
         s = paddle.reshape(s, (s.shape[0], -1))
         return s
@@ -278,15 +311,25 @@ def forward(self, x, y):
 
 @GENERATORS.register()
 class FAN(nn.Layer):
-    def __init__(self, num_modules=1, end_relu=False, num_landmarks=98, fname_pretrained=None):
+    def __init__(self,
+                 num_modules=1,
+                 end_relu=False,
+                 num_landmarks=98,
+                 fname_pretrained=None):
         super(FAN, self).__init__()
         self.num_modules = num_modules
         self.end_relu = end_relu
 
         # Base part
-        self.conv1 = CoordConvTh(256, 256, True, False,
-                                 in_channels=3, out_channels=64,
-                                 kernel_size=7, stride=2, padding=3)
+        self.conv1 = CoordConvTh(256,
+                                 256,
+                                 True,
+                                 False,
+                                 in_channels=3,
+                                 out_channels=64,
+                                 kernel_size=7,
+                                 stride=2,
+                                 padding=3)
         self.bn1 = nn.BatchNorm2D(64)
         self.conv2 = ConvBlock(64, 128)
         self.conv3 = ConvBlock(128, 128)
@@ -297,7 +340,7 @@ def __init__(self, num_modules=1, end_relu=False, num_landmarks=98, fname_pretra
         self.add_sublayer('top_m_0', ConvBlock(256, 256))
         self.add_sublayer('conv_last0', nn.Conv2D(256, 256, 1, 1, 0))
         self.add_sublayer('bn_end0', nn.BatchNorm2D(256))
-        self.add_sublayer('l0', nn.Conv2D(256, num_landmarks+1, 1, 1, 0))
+        self.add_sublayer('l0', nn.Conv2D(256, num_landmarks + 1, 1, 1, 0))
 
         if fname_pretrained is not None:
             self.load_pretrained_weights(fname_pretrained)
@@ -312,10 +355,12 @@ def load_pretrained_weights(self, fname):
         with open(fname, 'rb') as f:
             checkpoint = pickle.load(f) if six.PY2 else pickle.load(
                 f, encoding='latin1')
-        
+
         model_weights = self.state_dict()
-        model_weights.update({k: v for k, v in checkpoint['state_dict'].items()
-                              if k in model_weights})
+        model_weights.update({
+            k: v
+            for k, v in checkpoint['state_dict'].items() if k in model_weights
+        })
         self.set_state_dict(model_weights)
 
     def forward(self, x):
@@ -330,8 +375,9 @@ def forward(self, x):
         tmp_out = None
         ll, boundary_channel = self._sub_layers['m0'](x, tmp_out)
         ll = self._sub_layers['top_m_0'](ll)
-        ll = F.relu(self._sub_layers['bn_end0']
-                    (self._sub_layers['conv_last0'](ll)), True)
+        ll = F.relu(
+            self._sub_layers['bn_end0'](self._sub_layers['conv_last0'](ll)),
+            True)
 
         # Predict heatmaps
         tmp_out = self._sub_layers['l0'](ll)
@@ -345,12 +391,14 @@ def forward(self, x):
     def get_heatmap(self, x, b_preprocess=True):
         ''' outputs 0-1 normalized heatmap '''
         x = F.interpolate(x, size=[256, 256], mode='bilinear')
-        x_01 = x*0.5 + 0.5
+        x_01 = x * 0.5 + 0.5
         outputs, _ = self(x_01)
         heatmaps = outputs[-1][:, :-1, :, :]
         scale_factor = x.shape[2] // heatmaps.shape[2]
         if b_preprocess:
-            heatmaps = F.interpolate(heatmaps, scale_factor=scale_factor,
-                                     mode='bilinear', align_corners=True)
+            heatmaps = F.interpolate(heatmaps,
+                                     scale_factor=scale_factor,
+                                     mode='bilinear',
+                                     align_corners=True)
             heatmaps = preprocess(heatmaps)
         return heatmaps
diff --git a/ppgan/models/generators/generator_styleganv2.py b/ppgan/models/generators/generator_styleganv2.py
index 72a6c0a3366cf8..6297a3ea8f949d 100644
--- a/ppgan/models/generators/generator_styleganv2.py
+++ b/ppgan/models/generators/generator_styleganv2.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/rosinality/stylegan2-pytorch
+# MIT License
+# Copyright (c) 2019 Kim Seonghyeon
+
 import math
 import random
 import paddle
@@ -147,7 +151,7 @@ def forward(self, image, noise=None):
         if noise is None:
             batch, _, height, width = image.shape
             noise = paddle.randn((batch, 1, height, width))
-        if self.is_concat: 
+        if self.is_concat:
             return paddle.concat([image, self.weight * noise], axis=1)
         else:
             return image + self.weight * noise
@@ -169,17 +173,15 @@ def forward(self, input):
 
 
 class StyledConv(nn.Layer):
-    def __init__(
-        self,
-        in_channel,
-        out_channel,
-        kernel_size,
-        style_dim,
-        upsample=False,
-        blur_kernel=[1, 3, 3, 1],
-        demodulate=True,
-        is_concat=False
-    ):
+    def __init__(self,
+                 in_channel,
+                 out_channel,
+                 kernel_size,
+                 style_dim,
+                 upsample=False,
+                 blur_kernel=[1, 3, 3, 1],
+                 demodulate=True,
+                 is_concat=False):
         super().__init__()
 
         self.conv = ModulatedConv2D(
@@ -193,7 +195,8 @@ def __init__(
         )
 
         self.noise = NoiseInjection(is_concat=is_concat)
-        self.activate = FusedLeakyReLU(out_channel*2 if is_concat else out_channel)
+        self.activate = FusedLeakyReLU(out_channel *
+                                       2 if is_concat else out_channel)
 
     def forward(self, input, style, noise=None):
         out = self.conv(input, style)
@@ -236,16 +239,14 @@ def forward(self, input, style, skip=None):
 
 @GENERATORS.register()
 class StyleGANv2Generator(nn.Layer):
-    def __init__(
-        self,
-        size,
-        style_dim,
-        n_mlp,
-        channel_multiplier=2,
-        blur_kernel=[1, 3, 3, 1],
-        lr_mlp=0.01,
-        is_concat=False
-    ):
+    def __init__(self,
+                 size,
+                 style_dim,
+                 n_mlp,
+                 channel_multiplier=2,
+                 blur_kernel=[1, 3, 3, 1],
+                 lr_mlp=0.01,
+                 is_concat=False):
         super().__init__()
 
         self.size = size
@@ -282,7 +283,10 @@ def __init__(
                                 style_dim,
                                 blur_kernel=blur_kernel,
                                 is_concat=is_concat)
-        self.to_rgb1 = ToRGB(self.channels[4]*2 if is_concat else self.channels[4], style_dim, upsample=False)
+        self.to_rgb1 = ToRGB(self.channels[4] *
+                             2 if is_concat else self.channels[4],
+                             style_dim,
+                             upsample=False)
 
         self.log_size = int(math.log(size, 2))
         self.num_layers = (self.log_size - 2) * 2 + 1
@@ -305,7 +309,7 @@ def __init__(
 
             self.convs.append(
                 StyledConv(
-                    in_channel*2 if is_concat else in_channel,
+                    in_channel * 2 if is_concat else in_channel,
                     out_channel,
                     3,
                     style_dim,
@@ -315,14 +319,15 @@ def __init__(
                 ))
 
             self.convs.append(
-                StyledConv(out_channel*2 if is_concat else out_channel,
+                StyledConv(out_channel * 2 if is_concat else out_channel,
                            out_channel,
                            3,
                            style_dim,
                            blur_kernel=blur_kernel,
                            is_concat=is_concat))
 
-            self.to_rgbs.append(ToRGB(out_channel*2 if is_concat else out_channel, style_dim))
+            self.to_rgbs.append(
+                ToRGB(out_channel * 2 if is_concat else out_channel, style_dim))
 
             in_channel = out_channel
 
@@ -408,20 +413,21 @@ def forward(
             noise_i = 1
 
             outs = []
-            for conv1, conv2, to_rgb in zip(
-                self.convs[::2], self.convs[1::2], self.to_rgbs):
-                out = conv1(out, latent[:, i], noise=noise[(noise_i + 1)//2]) ### 1 for 2
-                out = conv2(out, latent[:, i + 1], noise=noise[(noise_i + 2)//2]) ### 1 for 2
+            for conv1, conv2, to_rgb in zip(self.convs[::2], self.convs[1::2],
+                                            self.to_rgbs):
+                out = conv1(out, latent[:, i],
+                            noise=noise[(noise_i + 1) // 2])  ### 1 for 2
+                out = conv2(out,
+                            latent[:, i + 1],
+                            noise=noise[(noise_i + 2) // 2])  ### 1 for 2
                 skip = to_rgb(out, latent[:, i + 2], skip)
-                
+
                 i += 2
                 noise_i += 2
-        else:    
-            for conv1, conv2, noise1, noise2, to_rgb in zip(self.convs[::2],
-                                                            self.convs[1::2],
-                                                            noise[1::2],
-                                                            noise[2::2],
-                                                            self.to_rgbs):
+        else:
+            for conv1, conv2, noise1, noise2, to_rgb in zip(
+                    self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2],
+                    self.to_rgbs):
                 out = conv1(out, latent[:, i], noise=noise1)
                 out = conv2(out, latent[:, i + 1], noise=noise2)
                 skip = to_rgb(out, latent[:, i + 2], skip)
diff --git a/ppgan/models/generators/resnet_ugatit_p2c.py b/ppgan/models/generators/resnet_ugatit_p2c.py
index 865fd9ca5d8ea0..e7874c8bccfb97 100644
--- a/ppgan/models/generators/resnet_ugatit_p2c.py
+++ b/ppgan/models/generators/resnet_ugatit_p2c.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/znxlwm/UGATIT-pytorch
+# MIT License
+# Copyright (c) 2019 Hyeonwoo Kang
 
 import paddle
 import paddle.nn as nn
@@ -45,72 +48,82 @@ def __init__(self,
             nn.ReLU()
         ]
 
-        DownBlock += [
-            HourGlass(ngf, ngf),
-            HourGlass(ngf, ngf)
-        ]
+        DownBlock += [HourGlass(ngf, ngf), HourGlass(ngf, ngf)]
 
         # Down-Sampling
         n_downsampling = 2
         for i in range(n_downsampling):
-            mult = 2 ** i
+            mult = 2**i
             DownBlock += [
                 nn.Pad2D([1, 1, 1, 1], 'reflect'),
-                nn.Conv2D(ngf*mult, ngf*mult*2, kernel_size=3, stride=2, bias_attr=False),
-                nn.InstanceNorm2D(ngf*mult*2, weight_attr=False, bias_attr=False),
+                nn.Conv2D(ngf * mult,
+                          ngf * mult * 2,
+                          kernel_size=3,
+                          stride=2,
+                          bias_attr=False),
+                nn.InstanceNorm2D(ngf * mult * 2,
+                                  weight_attr=False,
+                                  bias_attr=False),
                 nn.ReLU()
             ]
 
         # Encoder Bottleneck
-        mult = 2 ** n_downsampling
+        mult = 2**n_downsampling
         for i in range(n_blocks):
-            setattr(self, 'EncodeBlock'+str(i+1), ResnetBlock(ngf*mult))
+            setattr(self, 'EncodeBlock' + str(i + 1), ResnetBlock(ngf * mult))
 
         # Class Activation Map
-        self.gap_fc = nn.Linear(ngf*mult, 1, bias_attr=False)
-        self.gmp_fc = nn.Linear(ngf*mult, 1, bias_attr=False)
-        self.conv1x1 = nn.Conv2D(ngf*mult*2, ngf*mult, kernel_size=1, stride=1)
+        self.gap_fc = nn.Linear(ngf * mult, 1, bias_attr=False)
+        self.gmp_fc = nn.Linear(ngf * mult, 1, bias_attr=False)
+        self.conv1x1 = nn.Conv2D(ngf * mult * 2,
+                                 ngf * mult,
+                                 kernel_size=1,
+                                 stride=1)
         self.relu = nn.ReLU()
 
         # Gamma, Beta block
         FC = []
         if self.light:
             FC += [
-                nn.Linear(ngf*mult, ngf*mult, bias_attr=False),
+                nn.Linear(ngf * mult, ngf * mult, bias_attr=False),
                 nn.ReLU(),
-                nn.Linear(ngf*mult, ngf*mult, bias_attr=False),
+                nn.Linear(ngf * mult, ngf * mult, bias_attr=False),
                 nn.ReLU()
             ]
 
         else:
             FC += [
-                nn.Linear(img_size//mult*img_size//mult*ngf*mult, ngf*mult, bias_attr=False),
+                nn.Linear(img_size // mult * img_size // mult * ngf * mult,
+                          ngf * mult,
+                          bias_attr=False),
                 nn.ReLU(),
-                nn.Linear(ngf*mult, ngf*mult, bias_attr=False),
+                nn.Linear(ngf * mult, ngf * mult, bias_attr=False),
                 nn.ReLU()
             ]
 
         # Decoder Bottleneck
-        mult = 2 ** n_downsampling
+        mult = 2**n_downsampling
         for i in range(n_blocks):
-            setattr(self, 'DecodeBlock'+str(i + 1), ResnetSoftAdaLINBlock(ngf*mult))
+            setattr(self, 'DecodeBlock' + str(i + 1),
+                    ResnetSoftAdaLINBlock(ngf * mult))
 
         # Up-Sampling
         UpBlock = []
         for i in range(n_downsampling):
-            mult = 2 ** (n_downsampling - i)
+            mult = 2**(n_downsampling - i)
             UpBlock += [
                 nn.Upsample(scale_factor=2),
                 nn.Pad2D([1, 1, 1, 1], 'reflect'),
-                nn.Conv2D(ngf*mult, ngf*mult//2, kernel_size=3, stride=1, bias_attr=False),
-                LIN(ngf*mult//2),
+                nn.Conv2D(ngf * mult,
+                          ngf * mult // 2,
+                          kernel_size=3,
+                          stride=1,
+                          bias_attr=False),
+                LIN(ngf * mult // 2),
                 nn.ReLU()
             ]
 
-        UpBlock += [
-            HourGlass(ngf, ngf),
-            HourGlass(ngf, ngf, False)
-            ]
+        UpBlock += [HourGlass(ngf, ngf), HourGlass(ngf, ngf, False)]
 
         UpBlock += [
             nn.Pad2D([3, 3, 3, 3], 'reflect'),
@@ -129,8 +142,9 @@ def forward(self, x):
 
         content_features = []
         for i in range(self.n_blocks):
-            x = getattr(self, 'EncodeBlock'+str(i+1))(x)
-            content_features.append(F.adaptive_avg_pool2d(x, 1).reshape([bs, -1]))
+            x = getattr(self, 'EncodeBlock' + str(i + 1))(x)
+            content_features.append(
+                F.adaptive_avg_pool2d(x, 1).reshape([bs, -1]))
 
         gap = F.adaptive_avg_pool2d(x, 1)
         gap_logit = self.gap_fc(gap.reshape([bs, -1]))
@@ -155,7 +169,10 @@ def forward(self, x):
             style_features = self.FC(x.reshape([bs, -1]))
 
         for i in range(self.n_blocks):
-            x = getattr(self, 'DecodeBlock'+str(i+1))(x, content_features[4-i-1], style_features)
+            x = getattr(self,
+                        'DecodeBlock' + str(i + 1))(x,
+                                                    content_features[4 - i - 1],
+                                                    style_features)
 
         out = self.UpBlock(x)
 
@@ -168,25 +185,27 @@ def __init__(self, dim_in, dim_out):
         self.dim_in = dim_in
         self.dim_out = dim_out
 
-        self.conv_block1 = self.__convblock(dim_in, dim_out//2)
-        self.conv_block2 = self.__convblock(dim_out//2, dim_out//4)
-        self.conv_block3 = self.__convblock(dim_out//4, dim_out//4)
+        self.conv_block1 = self.__convblock(dim_in, dim_out // 2)
+        self.conv_block2 = self.__convblock(dim_out // 2, dim_out // 4)
+        self.conv_block3 = self.__convblock(dim_out // 4, dim_out // 4)
 
         if self.dim_in != self.dim_out:
             self.conv_skip = nn.Sequential(
                 nn.InstanceNorm2D(dim_in, weight_attr=False, bias_attr=False),
                 nn.ReLU(),
-                nn.Conv2D(dim_in, dim_out, kernel_size=1, stride=1, bias_attr=False)
-            )
+                nn.Conv2D(dim_in,
+                          dim_out,
+                          kernel_size=1,
+                          stride=1,
+                          bias_attr=False))
 
     @staticmethod
     def __convblock(dim_in, dim_out):
         return nn.Sequential(
             nn.InstanceNorm2D(dim_in, weight_attr=False, bias_attr=False),
-            nn.ReLU(),
-            nn.Pad2D([1, 1, 1, 1], 'reflect'),
-            nn.Conv2D(dim_in, dim_out, kernel_size=3, stride=1, bias_attr=False)
-        )
+            nn.ReLU(), nn.Pad2D([1, 1, 1, 1], 'reflect'),
+            nn.Conv2D(dim_in, dim_out, kernel_size=3, stride=1,
+                      bias_attr=False))
 
     def forward(self, x):
         residual = x
@@ -210,24 +229,25 @@ def __init__(self, dim_in):
         self.n_block = 9
 
         for i in range(self.n_skip):
-            setattr(self, 'ConvBlockskip'+str(i+1), ConvBlock(dim_in, dim_in))
+            setattr(self, 'ConvBlockskip' + str(i + 1),
+                    ConvBlock(dim_in, dim_in))
 
         for i in range(self.n_block):
-            setattr(self, 'ConvBlock'+str(i+1), ConvBlock(dim_in, dim_in))
+            setattr(self, 'ConvBlock' + str(i + 1), ConvBlock(dim_in, dim_in))
 
     def forward(self, x):
         skips = []
         for i in range(self.n_skip):
-            skips.append(getattr(self, 'ConvBlockskip'+str(i+1))(x))
+            skips.append(getattr(self, 'ConvBlockskip' + str(i + 1))(x))
             x = F.avg_pool2d(x, 2)
-            x = getattr(self, 'ConvBlock'+str(i+1))(x)
+            x = getattr(self, 'ConvBlock' + str(i + 1))(x)
 
         x = self.ConvBlock5(x)
 
         for i in range(self.n_skip):
-            x = getattr(self, 'ConvBlock'+str(i+6))(x)
+            x = getattr(self, 'ConvBlock' + str(i + 6))(x)
             x = F.upsample(x, scale_factor=2)
-            x = skips[self.n_skip-i-1] + x
+            x = skips[self.n_skip - i - 1] + x
 
         return x
 
@@ -238,12 +258,14 @@ def __init__(self, dim_in, dim_out, use_res=True):
         self.use_res = use_res
 
         self.HG = nn.Sequential(
-            HourGlassBlock(dim_in),
-            ConvBlock(dim_out, dim_out),
-            nn.Conv2D(dim_out, dim_out, kernel_size=1, stride=1, bias_attr=False),
+            HourGlassBlock(dim_in), ConvBlock(dim_out, dim_out),
+            nn.Conv2D(dim_out,
+                      dim_out,
+                      kernel_size=1,
+                      stride=1,
+                      bias_attr=False),
             nn.InstanceNorm2D(dim_out, weight_attr=False, bias_attr=False),
-            nn.ReLU()
-        )
+            nn.ReLU())
 
         self.Conv1 = nn.Conv2D(dim_out, 3, kernel_size=1, stride=1)
 
@@ -292,12 +314,20 @@ class ResnetSoftAdaLINBlock(nn.Layer):
     def __init__(self, dim, use_bias=False):
         super(ResnetSoftAdaLINBlock, self).__init__()
         self.pad1 = nn.Pad2D([1, 1, 1, 1], 'reflect')
-        self.conv1 = nn.Conv2D(dim, dim, kernel_size=3, stride=1, bias_attr=use_bias)
+        self.conv1 = nn.Conv2D(dim,
+                               dim,
+                               kernel_size=3,
+                               stride=1,
+                               bias_attr=use_bias)
         self.norm1 = SoftAdaLIN(dim)
         self.relu1 = nn.ReLU()
 
         self.pad2 = nn.Pad2D([1, 1, 1, 1], 'reflect')
-        self.conv2 = nn.Conv2D(dim, dim, kernel_size=3, stride=1, bias_attr=use_bias)
+        self.conv2 = nn.Conv2D(dim,
+                               dim,
+                               kernel_size=3,
+                               stride=1,
+                               bias_attr=use_bias)
         self.norm2 = SoftAdaLIN(dim)
 
     def forward(self, x, content_features, style_features):
@@ -317,23 +347,28 @@ def __init__(self, num_features, eps=1e-5):
         super(SoftAdaLIN, self).__init__()
         self.norm = AdaLIN(num_features, eps)
 
-        self.w_gamma = self.create_parameter([1, num_features], default_initializer=nn.initializer.Constant(0.))
-        self.w_beta = self.create_parameter([1, num_features], default_initializer=nn.initializer.Constant(0.))
-
-        self.c_gamma = nn.Sequential(nn.Linear(num_features, num_features, bias_attr=False),
-                                     nn.ReLU(),
-                                     nn.Linear(num_features, num_features, bias_attr=False))
-        self.c_beta = nn.Sequential(nn.Linear(num_features, num_features, bias_attr=False),
-                                    nn.ReLU(),
-                                    nn.Linear(num_features, num_features, bias_attr=False))
+        self.w_gamma = self.create_parameter(
+            [1, num_features], default_initializer=nn.initializer.Constant(0.))
+        self.w_beta = self.create_parameter(
+            [1, num_features], default_initializer=nn.initializer.Constant(0.))
+
+        self.c_gamma = nn.Sequential(
+            nn.Linear(num_features, num_features, bias_attr=False), nn.ReLU(),
+            nn.Linear(num_features, num_features, bias_attr=False))
+        self.c_beta = nn.Sequential(
+            nn.Linear(num_features, num_features, bias_attr=False), nn.ReLU(),
+            nn.Linear(num_features, num_features, bias_attr=False))
         self.s_gamma = nn.Linear(num_features, num_features, bias_attr=False)
         self.s_beta = nn.Linear(num_features, num_features, bias_attr=False)
 
     def forward(self, x, content_features, style_features):
-        content_gamma, content_beta = self.c_gamma(content_features), self.c_beta(content_features)
-        style_gamma, style_beta = self.s_gamma(style_features), self.s_beta(style_features)
+        content_gamma, content_beta = self.c_gamma(
+            content_features), self.c_beta(content_features)
+        style_gamma, style_beta = self.s_gamma(style_features), self.s_beta(
+            style_features)
 
-        w_gamma_, w_beta_ = self.w_gamma.expand([x.shape[0], -1]), self.w_beta.expand([x.shape[0], -1])
+        w_gamma_, w_beta_ = self.w_gamma.expand(
+            [x.shape[0], -1]), self.w_beta.expand([x.shape[0], -1])
         soft_gamma = (1. - w_gamma_) * style_gamma + w_gamma_ * content_gamma
         soft_beta = (1. - w_beta_) * style_beta + w_beta_ * content_beta
 
@@ -345,16 +380,25 @@ class AdaLIN(nn.Layer):
     def __init__(self, num_features, eps=1e-5):
         super(AdaLIN, self).__init__()
         self.eps = eps
-        self.rho = self.create_parameter([1, num_features, 1, 1], default_initializer=nn.initializer.Constant(0.9))
+        self.rho = self.create_parameter(
+            [1, num_features, 1, 1],
+            default_initializer=nn.initializer.Constant(0.9))
 
     def forward(self, x, gamma, beta):
-        in_mean, in_var = paddle.mean(x, axis=[2, 3], keepdim=True), paddle.var(x, axis=[2, 3], keepdim=True)
+        in_mean, in_var = paddle.mean(x, axis=[2, 3],
+                                      keepdim=True), paddle.var(x,
+                                                                axis=[2, 3],
+                                                                keepdim=True)
         out_in = (x - in_mean) / paddle.sqrt(in_var + self.eps)
-        ln_mean, ln_var = paddle.mean(x, axis=[1, 2, 3], keepdim=True), paddle.var(x, axis=[1, 2, 3], keepdim=True)
+        ln_mean, ln_var = paddle.mean(x, axis=[1, 2, 3],
+                                      keepdim=True), paddle.var(x,
+                                                                axis=[1, 2, 3],
+                                                                keepdim=True)
         out_ln = (x - ln_mean) / paddle.sqrt(ln_var + self.eps)
         out = self.rho.expand([x.shape[0], -1, -1, -1]) * out_in + \
               (1-self.rho.expand([x.shape[0], -1, -1, -1])) * out_ln
-        out = out * gamma.unsqueeze(2).unsqueeze(3) + beta.unsqueeze(2).unsqueeze(3)
+        out = out * gamma.unsqueeze(2).unsqueeze(3) + beta.unsqueeze(
+            2).unsqueeze(3)
 
         return out
 
@@ -363,17 +407,31 @@ class LIN(nn.Layer):
     def __init__(self, num_features, eps=1e-5):
         super(LIN, self).__init__()
         self.eps = eps
-        self.rho = self.create_parameter([1, num_features, 1, 1], default_initializer=nn.initializer.Constant(0.))
-        self.gamma = self.create_parameter([1, num_features, 1, 1], default_initializer=nn.initializer.Constant(1.))
-        self.beta = self.create_parameter([1, num_features, 1, 1], default_initializer=nn.initializer.Constant(0.))
+        self.rho = self.create_parameter(
+            [1, num_features, 1, 1],
+            default_initializer=nn.initializer.Constant(0.))
+        self.gamma = self.create_parameter(
+            [1, num_features, 1, 1],
+            default_initializer=nn.initializer.Constant(1.))
+        self.beta = self.create_parameter(
+            [1, num_features, 1, 1],
+            default_initializer=nn.initializer.Constant(0.))
 
     def forward(self, x):
-        in_mean, in_var = paddle.mean(x, axis=[2, 3], keepdim=True), paddle.var(x, axis=[2, 3], keepdim=True)
+        in_mean, in_var = paddle.mean(x, axis=[2, 3],
+                                      keepdim=True), paddle.var(x,
+                                                                axis=[2, 3],
+                                                                keepdim=True)
         out_in = (x - in_mean) / paddle.sqrt(in_var + self.eps)
-        ln_mean, ln_var = paddle.mean(x, axis=[1, 2, 3], keepdim=True), paddle.var(x, axis=[1, 2, 3], keepdim=True)
+        ln_mean, ln_var = paddle.mean(x, axis=[1, 2, 3],
+                                      keepdim=True), paddle.var(x,
+                                                                axis=[1, 2, 3],
+                                                                keepdim=True)
         out_ln = (x - ln_mean) / paddle.sqrt(ln_var + self.eps)
         out = self.rho.expand([x.shape[0], -1, -1, -1]) * out_in + \
               (1-self.rho.expand([x.shape[0], -1, -1, -1])) * out_ln
-        out = out * self.gamma.expand([x.shape[0], -1, -1, -1]) + self.beta.expand([x.shape[0], -1, -1, -1])
+        out = out * self.gamma.expand([x.shape[0], -1, -1, -1
+                                       ]) + self.beta.expand(
+                                           [x.shape[0], -1, -1, -1])
 
         return out
diff --git a/ppgan/models/generators/wav2lip.py b/ppgan/models/generators/wav2lip.py
index 7c62f783462f82..5c8b0c9438a081 100644
--- a/ppgan/models/generators/wav2lip.py
+++ b/ppgan/models/generators/wav2lip.py
@@ -18,9 +18,9 @@ def __init__(self):
 
         self.face_encoder_blocks = nn.LayerList([
             nn.Sequential(ConvBNRelu(6, 16, kernel_size=7, stride=1,
-                                     padding=3)),  # 96,96
+                                     padding=3)),
             nn.Sequential(
-                ConvBNRelu(16, 32, kernel_size=3, stride=2, padding=1),  # 48,48
+                ConvBNRelu(16, 32, kernel_size=3, stride=2, padding=1),
                 ConvBNRelu(32,
                            32,
                            kernel_size=3,
@@ -34,7 +34,7 @@ def __init__(self):
                            padding=1,
                            residual=True)),
             nn.Sequential(
-                ConvBNRelu(32, 64, kernel_size=3, stride=2, padding=1),  # 24,24
+                ConvBNRelu(32, 64, kernel_size=3, stride=2, padding=1),
                 ConvBNRelu(64,
                            64,
                            kernel_size=3,
@@ -54,8 +54,7 @@ def __init__(self):
                            padding=1,
                            residual=True)),
             nn.Sequential(
-                ConvBNRelu(64, 128, kernel_size=3, stride=2,
-                           padding=1),  # 12,12
+                ConvBNRelu(64, 128, kernel_size=3, stride=2, padding=1),
                 ConvBNRelu(128,
                            128,
                            kernel_size=3,
@@ -69,7 +68,7 @@ def __init__(self):
                            padding=1,
                            residual=True)),
             nn.Sequential(
-                ConvBNRelu(128, 256, kernel_size=3, stride=2, padding=1),  # 6,6
+                ConvBNRelu(128, 256, kernel_size=3, stride=2, padding=1),
                 ConvBNRelu(256,
                            256,
                            kernel_size=3,
@@ -83,7 +82,7 @@ def __init__(self):
                            padding=1,
                            residual=True)),
             nn.Sequential(
-                ConvBNRelu(256, 512, kernel_size=3, stride=2, padding=1),  # 3,3
+                ConvBNRelu(256, 512, kernel_size=3, stride=2, padding=1),
                 ConvBNRelu(512,
                            512,
                            kernel_size=3,
@@ -92,8 +91,7 @@ def __init__(self):
                            residual=True),
             ),
             nn.Sequential(
-                ConvBNRelu(512, 512, kernel_size=3, stride=1,
-                           padding=0),  # 1, 1
+                ConvBNRelu(512, 512, kernel_size=3, stride=1, padding=0),
                 ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0)),
         ])
 
@@ -156,7 +154,7 @@ def __init__(self):
                                     512,
                                     kernel_size=3,
                                     stride=1,
-                                    padding=0),  # 3,3
+                                    padding=0),
                 ConvBNRelu(512,
                            512,
                            kernel_size=3,
@@ -183,7 +181,7 @@ def __init__(self):
                            stride=1,
                            padding=1,
                            residual=True),
-            ),  # 6, 6
+            ),
             nn.Sequential(
                 Conv2dTransposeRelu(768,
                                     384,
@@ -203,7 +201,7 @@ def __init__(self):
                            stride=1,
                            padding=1,
                            residual=True),
-            ),  # 12, 12
+            ),
             nn.Sequential(
                 Conv2dTransposeRelu(512,
                                     256,
@@ -223,7 +221,7 @@ def __init__(self):
                            stride=1,
                            padding=1,
                            residual=True),
-            ),  # 24, 24
+            ),
             nn.Sequential(
                 Conv2dTransposeRelu(320,
                                     128,
@@ -243,7 +241,7 @@ def __init__(self):
                            stride=1,
                            padding=1,
                            residual=True),
-            ),  # 48, 48
+            ),
             nn.Sequential(
                 Conv2dTransposeRelu(160,
                                     64,
@@ -264,14 +262,13 @@ def __init__(self):
                            padding=1,
                            residual=True),
             ),
-        ])  # 96,96
+        ])
 
         self.output_block = nn.Sequential(
             ConvBNRelu(80, 32, kernel_size=3, stride=1, padding=1),
             nn.Conv2D(32, 3, kernel_size=1, stride=1, padding=0), nn.Sigmoid())
 
     def forward(self, audio_sequences, face_sequences):
-        # audio_sequences = (B, T, 1, 80, 16)
         B = audio_sequences.shape[0]
 
         input_dim_size = len(face_sequences.shape)
@@ -285,7 +282,7 @@ def forward(self, audio_sequences, face_sequences):
             ],
                                            axis=0)
 
-        audio_embedding = self.audio_encoder(audio_sequences)  # B, 512, 1, 1
+        audio_embedding = self.audio_encoder(audio_sequences)
 
         feats = []
         x = face_sequences
@@ -308,8 +305,8 @@ def forward(self, audio_sequences, face_sequences):
         x = self.output_block(x)
 
         if input_dim_size > 4:
-            x = paddle.split(x, int(x.shape[0] / B), axis=0)  # [(B, C, H, W)]
-            outputs = paddle.stack(x, axis=2)  # (B, C, T, H, W)
+            x = paddle.split(x, int(x.shape[0] / B), axis=0)
+            outputs = paddle.stack(x, axis=2)
 
         else:
             outputs = x
diff --git a/ppgan/models/mpr_model.py b/ppgan/models/mpr_model.py
index 1c2c7cb28a9c66..d88e8f11f441c9 100644
--- a/ppgan/models/mpr_model.py
+++ b/ppgan/models/mpr_model.py
@@ -77,12 +77,3 @@ def train_iter(self, optims=None):
     def forward(self):
         """Run forward pass; called by both functions <train_iter> and <test_iter>."""
         pass
-
-
-def init_edvr_weight(net):
-    def reset_func(m):
-        if hasattr(m, 'weight') and (not isinstance(
-                m, (nn.BatchNorm, nn.BatchNorm2D))):
-            reset_parameters(m)
-
-    net.apply(reset_func)
diff --git a/ppgan/models/starganv2_model.py b/ppgan/models/starganv2_model.py
index 0598579694e369..f7d5e5e6aed068 100755
--- a/ppgan/models/starganv2_model.py
+++ b/ppgan/models/starganv2_model.py
@@ -1,3 +1,7 @@
+# code was heavily based on https://github.com/clovaai/stargan-v2
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/clovaai/stargan-v2#license
+
 from paddle.fluid.layers.nn import soft_relu
 from .base_model import BaseModel
 
diff --git a/ppgan/modules/caffevgg.py b/ppgan/modules/caffevgg.py
index bf40f5bb30208e..b0780899ecceb8 100644
--- a/ppgan/modules/caffevgg.py
+++ b/ppgan/modules/caffevgg.py
@@ -2,6 +2,7 @@
 import paddle.nn as nn
 import numpy as np
 from ppgan.utils.download import get_path_from_url
+
 model_urls = {
     'caffevgg19': ('https://paddlegan.bj.bcebos.com/models/vgg19_no_fc.npy',
                    '8ea1ef2374f8684b6cea9f300849be81')
@@ -29,10 +30,13 @@ def __init__(self, output_index: int = 26) -> None:
         self.mean = mean.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
 
     def _process(self, x):
-        rgb = (x * 0.5 + 0.5) * 255  # value to 255
+        # value to 255
+        rgb = (x * 0.5 + 0.5) * 255
+        # rgb to bgr
         bgr = paddle.stack((rgb[:, 2, :, :], rgb[:, 1, :, :], rgb[:, 0, :, :]),
-                           1)  # rgb to bgr
-        return bgr - self.mean  # vgg norm
+                           1)
+        # vgg norm
+        return bgr - self.mean
 
     def _forward_impl(self, x):
         x = self._process(x)
diff --git a/ppgan/modules/equalized.py b/ppgan/modules/equalized.py
index 7d2eef17ba6feb..2ef60e66846d65 100644
--- a/ppgan/modules/equalized.py
+++ b/ppgan/modules/equalized.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/rosinality/stylegan2-pytorch
+# MIT License
+# Copyright (c) 2019 Kim Seonghyeon
+
 import math
 import paddle
 import paddle.nn as nn
diff --git a/ppgan/modules/fused_act.py b/ppgan/modules/fused_act.py
index d1bc584fc5fa0b..0bf89f00d0aac7 100644
--- a/ppgan/modules/fused_act.py
+++ b/ppgan/modules/fused_act.py
@@ -12,37 +12,40 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/rosinality/stylegan2-pytorch
+# MIT License
+# Copyright (c) 2019 Kim Seonghyeon
+
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
- 
- 
+
+
 class FusedLeakyReLU(nn.Layer):
-    def __init__(self, channel, bias=True, negative_slope=0.2, scale=2 ** 0.5):
+    def __init__(self, channel, bias=True, negative_slope=0.2, scale=2**0.5):
         super().__init__()
- 
+
         if bias:
-            self.bias = self.create_parameter((channel,), default_initializer=nn.initializer.Constant(0.0))
- 
+            self.bias = self.create_parameter(
+                (channel, ), default_initializer=nn.initializer.Constant(0.0))
+
         else:
             self.bias = None
- 
+
         self.negative_slope = negative_slope
         self.scale = scale
- 
+
     def forward(self, input):
-        return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
- 
- 
-def fused_leaky_relu(input, bias=None, negative_slope=0.2, scale=2 ** 0.5):
+        return fused_leaky_relu(input, self.bias, self.negative_slope,
+                                self.scale)
+
+
+def fused_leaky_relu(input, bias=None, negative_slope=0.2, scale=2**0.5):
     if bias is not None:
         rest_dim = [1] * (len(input.shape) - len(bias.shape) - 1)
-        return (
-            F.leaky_relu(
-                input + bias.reshape((1, bias.shape[0], *rest_dim)), negative_slope=0.2
-            )
-            * scale
-        )
- 
+        return (F.leaky_relu(input + bias.reshape(
+            (1, bias.shape[0], *rest_dim)),
+                             negative_slope=0.2) * scale)
+
     else:
         return F.leaky_relu(input, negative_slope=0.2) * scale
diff --git a/ppgan/modules/upfirdn2d.py b/ppgan/modules/upfirdn2d.py
index ac34a889b279a1..ca5972d93cddcf 100644
--- a/ppgan/modules/upfirdn2d.py
+++ b/ppgan/modules/upfirdn2d.py
@@ -12,6 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# code was heavily based on https://github.com/rosinality/stylegan2-pytorch
+# MIT License
+# Copyright (c) 2019 Kim Seonghyeon
+
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
diff --git a/ppgan/modules/wing.py b/ppgan/modules/wing.py
index 6b583db4c7d58f..4cdc1826aef4b1 100755
--- a/ppgan/modules/wing.py
+++ b/ppgan/modules/wing.py
@@ -1,8 +1,6 @@
-"""
-StarGAN v2
-Copyright (c) 2020-present NAVER Corp.
-
-"""
+# code was heavily based on https://github.com/clovaai/stargan-v2
+# Users should be careful about adopting these functions in any commercial matters.
+# https://github.com/clovaai/stargan-v2#license
 
 from collections import namedtuple
 from copy import deepcopy
@@ -25,9 +23,16 @@ def __init__(self, num_modules, depth, num_features, first_one=False):
         self.num_modules = num_modules
         self.depth = depth
         self.features = num_features
-        self.coordconv = CoordConvTh(64, 64, True, True, 256, first_one,
+        self.coordconv = CoordConvTh(64,
+                                     64,
+                                     True,
+                                     True,
+                                     256,
+                                     first_one,
                                      out_channels=256,
-                                     kernel_size=1, stride=1, padding=0)
+                                     kernel_size=1,
+                                     stride=1,
+                                     padding=0)
         self._generate_network(self.depth)
 
     def _generate_network(self, level):
@@ -68,14 +73,19 @@ def __init__(self, height=64, width=64, with_r=False, with_boundary=False):
         self.with_boundary = with_boundary
 
         with paddle.no_grad():
-            x_coords = paddle.arange(height).unsqueeze(1).expand((height, width)).astype('float32')
-            y_coords = paddle.arange(width).unsqueeze(0).expand((height, width)).astype('float32')
+            x_coords = paddle.arange(height).unsqueeze(1).expand(
+                (height, width)).astype('float32')
+            y_coords = paddle.arange(width).unsqueeze(0).expand(
+                (height, width)).astype('float32')
             x_coords = (x_coords / (height - 1)) * 2 - 1
             y_coords = (y_coords / (width - 1)) * 2 - 1
-            coords = paddle.stack([x_coords, y_coords], axis=0)  # (2, height, width)
+            coords = paddle.stack([x_coords, y_coords],
+                                  axis=0)  # (2, height, width)
 
             if self.with_r:
-                rr = paddle.sqrt(paddle.pow(x_coords, 2) + paddle.pow(y_coords, 2))  # (height, width)
+                rr = paddle.sqrt(
+                    paddle.pow(x_coords, 2) +
+                    paddle.pow(y_coords, 2))  # (height, width)
                 rr = (rr / paddle.max(rr)).unsqueeze(0)
                 coords = paddle.concat([coords, rr], axis=0)
 
@@ -92,9 +102,12 @@ def forward(self, x, heatmap=None):
         if self.with_boundary and heatmap is not None:
             boundary_channel = paddle.clip(heatmap[:, -1:, :, :], 0.0, 1.0)
             zero_tensor = paddle.zeros_like(self.x_coords)
-            xx_boundary_channel = paddle.where(boundary_channel > 0.05, self.x_coords, zero_tensor)
-            yy_boundary_channel = paddle.where(boundary_channel > 0.05, self.y_coords, zero_tensor)
-            coords = paddle.concat([coords, xx_boundary_channel, yy_boundary_channel], axis=1)
+            xx_boundary_channel = paddle.where(boundary_channel > 0.05,
+                                               self.x_coords, zero_tensor)
+            yy_boundary_channel = paddle.where(boundary_channel > 0.05,
+                                               self.y_coords, zero_tensor)
+            coords = paddle.concat(
+                [coords, xx_boundary_channel, yy_boundary_channel], axis=1)
 
         x_and_coords = paddle.concat([x, coords], axis=1)
         return x_and_coords
@@ -102,8 +115,15 @@ def forward(self, x, heatmap=None):
 
 class CoordConvTh(nn.Layer):
     """CoordConv layer as in the paper."""
-    def __init__(self, height, width, with_r, with_boundary,
-                 in_channels, first_one=False, *args, **kwargs):
+    def __init__(self,
+                 height,
+                 width,
+                 with_r,
+                 with_boundary,
+                 in_channels,
+                 first_one=False,
+                 *args,
+                 **kwargs):
         super(CoordConvTh, self).__init__()
         self.addcoords = AddCoordsTh(height, width, with_r, with_boundary)
         in_channels += 2
@@ -124,7 +144,12 @@ class ConvBlock(nn.Layer):
     def __init__(self, in_planes, out_planes):
         super(ConvBlock, self).__init__()
         self.bn1 = nn.BatchNorm2D(in_planes)
-        conv3x3 = partial(nn.Conv2D, kernel_size=3, stride=1, padding=1, bias_attr=False, dilation=1)
+        conv3x3 = partial(nn.Conv2D,
+                          kernel_size=3,
+                          stride=1,
+                          padding=1,
+                          bias_attr=False,
+                          dilation=1)
         self.conv1 = conv3x3(in_planes, int(out_planes / 2))
         self.bn2 = nn.BatchNorm2D(int(out_planes / 2))
         self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4))
@@ -133,9 +158,9 @@ def __init__(self, in_planes, out_planes):
 
         self.downsample = None
         if in_planes != out_planes:
-            self.downsample = nn.Sequential(nn.BatchNorm2D(in_planes),
-                                            nn.ReLU(True),
-                                            nn.Conv2D(in_planes, out_planes, 1, 1, bias_attr=False))
+            self.downsample = nn.Sequential(
+                nn.BatchNorm2D(in_planes), nn.ReLU(True),
+                nn.Conv2D(in_planes, out_planes, 1, 1, bias_attr=False))
 
     def forward(self, x):
         residual = x
@@ -168,7 +193,7 @@ def normalize(x, eps=1e-6):
     """Apply min-max normalization."""
     # x = x.contiguous()
     N, C, H, W = x.shape
-    x_ = paddle.reshape(x, (N*C, -1))
+    x_ = paddle.reshape(x, (N * C, -1))
     max_val = paddle.max(x_, axis=1, keepdim=True)[0]
     min_val = paddle.min(x_, axis=1, keepdim=True)[0]
     x_ = (x_ - min_val) / (max_val - min_val + eps)
@@ -193,14 +218,14 @@ def shift(x, N):
     N = abs(N)
     _, _, H, W = x.shape
     head = np.arange(N)
-    tail = np.arange(H-N)
+    tail = np.arange(H - N)
 
     if up:
-        head = np.arange(H-N)+N
+        head = np.arange(H - N) + N
         tail = np.arange(N)
     else:
-        head = np.arange(N) + (H-N)
-        tail = np.arange(H-N)
+        head = np.arange(N) + (H - N)
+        tail = np.arange(H - N)
 
     # permutation indices
     perm = np.concatenate([head, tail])
@@ -231,29 +256,31 @@ def preprocess(x):
 
     sw = H // 256
     operations = Munch(chin=OPPAIR(0, 3),
-                       eyebrows=OPPAIR(-7*sw, 2),
-                       nostrils=OPPAIR(8*sw, 4),
-                       lipupper=OPPAIR(-8*sw, 4),
-                       liplower=OPPAIR(8*sw, 4),
-                       lipinner=OPPAIR(-2*sw, 3))
+                       eyebrows=OPPAIR(-7 * sw, 2),
+                       nostrils=OPPAIR(8 * sw, 4),
+                       lipupper=OPPAIR(-8 * sw, 4),
+                       liplower=OPPAIR(8 * sw, 4),
+                       lipinner=OPPAIR(-2 * sw, 3))
 
     for part, ops in operations.items():
         start, end = index_map[part]
         x[:, start:end] = resize(shift(x[:, start:end], ops.shift), ops.resize)
 
-    zero_out = paddle.concat([paddle.arange(0, index_map.chin.start),
-                          paddle.arange(index_map.chin.end, 33),
-                          paddle.to_tensor([index_map.eyebrowsedges.start,
-                                            index_map.eyebrowsedges.end,
-                                            index_map.lipedges.start,
-                                            index_map.lipedges.end])])
+    zero_out = paddle.concat([
+        paddle.arange(0, index_map.chin.start),
+        paddle.arange(index_map.chin.end, 33),
+        paddle.to_tensor([
+            index_map.eyebrowsedges.start, index_map.eyebrowsedges.end,
+            index_map.lipedges.start, index_map.lipedges.end
+        ])
+    ])
     x = x.numpy()
     zero_out = zero_out.numpy()
     x[:, zero_out] = 0
     x = paddle.to_tensor(x)
 
     start, end = index_map.nose
-    x[:, start+1:end] = shift(x[:, start+1:end], 4*sw)
+    x[:, start + 1:end] = shift(x[:, start + 1:end], 4 * sw)
     x[:, start:end] = resize(x[:, start:end], 1)
 
     start, end = index_map.eyes
@@ -264,8 +291,10 @@ def preprocess(x):
     # Second-level mask
     x2 = deepcopy(x)
     x2[:, index_map.chin.start:index_map.chin.end] = 0  # start:end was 0:33
-    x2[:, index_map.lipedges.start:index_map.lipinner.end] = 0  # start:end was 76:96
-    x2[:, index_map.eyebrows.start:index_map.eyebrows.end] = 0  # start:end was 33:51
+    x2[:, index_map.lipedges.start:index_map.lipinner.
+       end] = 0  # start:end was 76:96
+    x2[:, index_map.eyebrows.start:index_map.eyebrows.
+       end] = 0  # start:end was 33:51
 
     x = paddle.sum(x, axis=1, keepdim=True)  # (N, 1, H, W)
     x2 = paddle.sum(x2, axis=1, keepdim=True)  # mask without faceline and mouth
diff --git a/ppgan/utils/visual.py b/ppgan/utils/visual.py
index dccb7a9d62f8fc..6982634661b896 100644
--- a/ppgan/utils/visual.py
+++ b/ppgan/utils/visual.py
@@ -45,18 +45,23 @@ def make_grid(tensor, nrow=8, normalize=False, range=None, scale_each=False):
     if isinstance(tensor, list):
         tensor = paddle.stack(tensor, 0)
 
-    if tensor.dim() == 2:  # single image H x W
+    # single image H x W
+    if tensor.dim() == 2:
         tensor = tensor.unsqueeze(0)
-    if tensor.dim() == 3:  # single image
-        if tensor.shape[0] == 1:  # if single-channel, convert to 3-channel
+    # single image
+    if tensor.dim() == 3:
+        # if single-channel, convert to 3-channel
+        if tensor.shape[0] == 1:
             tensor = paddle.concat([tensor, tensor, tensor], 0)
         tensor = tensor.unsqueeze(0)
 
-    if tensor.dim() == 4 and tensor.shape[1] == 1:  # single-channel images
+    # single-channel images
+    if tensor.dim() == 4 and tensor.shape[1] == 1:
         tensor = paddle.concat([tensor, tensor, tensor], 1)
 
     if normalize is True:
-        tensor = tensor.astype(tensor.dtype)  # avoid modifying tensor in-place
+        # avoid modifying tensor in-place
+        tensor = tensor.astype(tensor.dtype)
         if range is not None:
             assert isinstance(range, tuple), \
                 "range has to be a tuple (min, max) if specified. min and max are numbers"
@@ -72,7 +77,8 @@ def norm_range(t, range):
                 norm_ip(t, float(t.min()), float(t.max()))
 
         if scale_each is True:
-            for t in tensor:  # loop over mini-batch dimension
+            # loop over mini-batch dimension
+            for t in tensor:
                 norm_range(t, range)
         else:
             norm_range(tensor, range)
@@ -103,27 +109,31 @@ def tensor2img(input_image, min_max=(-1., 1.), image_num=1, imtype=np.uint8):
     """"Converts a Tensor array into a numpy image array.
 
     Parameters:
-        input_image (tensor) --  the input image tensor array
-        image_num (int)      --  the convert iamge numbers
-        imtype (type)        --  the desired type of the converted numpy array
+        input_image (tensor): the input image tensor array
+        image_num (int): the convert iamge numbers
+        imtype (type): the desired type of the converted numpy array
     """
     def processing(img, transpose=True):
         """"processing one numpy image.
 
         Parameters:
-            im (tensor) --  the input image numpy array
+            im (tensor): the input image numpy array
         """
-        if img.shape[0] == 1:  # grayscale to RGB
+        # grayscale to RGB
+        if img.shape[0] == 1:
             img = np.tile(img, (3, 1, 1))
         img = img.clip(min_max[0], min_max[1])
         img = (img - min_max[0]) / (min_max[1] - min_max[0])
         if imtype == np.uint8:
-            img = img * 255.0  # scaling
-        img = np.transpose(img, (1, 2, 0)) if transpose else img  # tranpose
+            # scaling
+            img = img * 255.0
+        # tranpose
+        img = np.transpose(img, (1, 2, 0)) if transpose else img
         return img
 
     if not isinstance(input_image, np.ndarray):
-        image_numpy = input_image.numpy()  # convert it into a numpy array
+        # convert it into a numpy array
+        image_numpy = input_image.numpy()
         ndim = image_numpy.ndim
         if ndim == 4:
             image_numpy = image_numpy[0:image_num]
@@ -144,7 +154,8 @@ def processing(img, transpose=True):
             image_numpy = np.stack(
                 [processing(im, transpose=False) for im in image_numpy])
 
-    else:  # if it is a numpy array, do nothing
+    else:
+        # if it is a numpy array, do nothing
         image_numpy = input_image
     image_numpy = image_numpy.round()
     return image_numpy.astype(imtype)