configs/put_cvpr2022/ffhq/transformer_ffhq.yaml

model:
  target: image_synthesis.modeling.models.masked_image_inpainting_transformer_in_feature.MaskedImageInpaintingTransformer
  params:
    n_layer: 30 #35
    content_seq_len: 1024
    n_embd: 512 # embedding dim
    n_head: 8

    num_token: 512

    embd_pdrop: 0.0
    attn_pdrop: 0.0
    resid_pdrop: 0.0
    attn_content_with_mask: False
    mlp_hidden_times: 4
    block_activate: GELU2
    random_quantize: 0.3
    weight_decay: 0.01

    content_codec_config:
      target: image_synthesis.modeling.codecs.image_codec.patch_vqgan.PatchVQGAN
      params:
        ckpt_path: OUTPUT/pvqvae_ffhq/checkpoint/last.pth
        trainable: False
        token_shape: [32, 32]
        combine_rec_and_gt: False #TODO False
        quantizer_config:
          target: image_synthesis.modeling.codecs.image_codec.patch_vqgan.VectorQuantizer
          params:
            n_e: 1024
            e_dim: 256
            masked_embed_start: 512
            embed_ema: True
            get_embed_type: retrive
            distance_type: euclidean 
        encoder_config: 
          target: image_synthesis.modeling.codecs.image_codec.patch_vqgan.PatchEncoder2
          params:
            in_ch: 3
            res_ch: 256
            out_ch: 256
            num_res_block: 8
            res_block_bottleneck: 2
            stride: 8
        decoder_config:
          target: image_synthesis.modeling.codecs.image_codec.patch_vqgan.PatchConvDecoder2
          params:
            in_ch: 256
            out_ch: 3
            res_ch: 256
            num_res_block: 8
            res_block_bottleneck: 2
            stride: 8
            up_layer_with_image: true
            encoder_downsample_layer: conv

solver:
  find_unused_parameters: False
  base_lr: 0.0
  adjust_lr: none # not adjust lr according to total batch_size
  max_epochs: 250
  save_epochs: 5
  validation_epochs: 1
  sample_iterations: 10000 # 5000      # how many iterations to perform sampling once ?
  optimizers_and_schedulers: # a list of configures, so we can config several optimizers and schedulers
  - name: transformer # default is None
    optimizer:
      target: torch.optim.AdamW
      params: 
        betas: !!python/tuple [0.9, 0.95]
    scheduler:
      step_iteration: 1
      target: image_synthesis.engine.lr_scheduler.CosineAnnealingLRWithWarmup
      params:
        min_lr: 1.0e-5
        warmup_lr: 3.0e-4 # the lr to be touched after warmup
        warmup: 2000 


dataloader:
  data_root: data
  batch_size: 16
  num_workers: 1
  train_datasets:
    - target: image_synthesis.data.image_list_dataset.ImageListDataset
      params:
        name: ffhq
        image_list_file: data/ffhqtrain_69k.txt
        provided_mask_name: irregular-mask/testing_mask_dataset
        use_provided_mask_ratio: [0.3333333, 1.0]
        use_provided_mask: 0.8
        mask: 1.0
        mask_low_to_high: 0.1
        mask_low_size: [32, 32]
        multi_image_mask: False 
        return_data_keys: [image, mask]
        stroken_mask_params:
          maxVertex: 10
          minVertex: 5
          maxLength: 100
          maxBrushWidth: 30
          minBrushWidth: 10
          keep_ratio: [0.3, 0.6]
          min_area: 64 # 8*8 we set the receptive field as the min masked area
        im_preprocessor_config:
          target: image_synthesis.data.utils.image_preprocessor.SimplePreprocessor
          params:
            size: [256, 256]
            smallest_max_size: 256
            random_crop: True
            horizon_flip: True
  validation_datasets:
    - target: image_synthesis.data.image_list_dataset.ImageListDataset
      params:
        name: ffhq
        image_list_file: data/ffhqvalidation_1k.txt
        provided_mask_name: irregular-mask/testing_mask_dataset
        use_provided_mask_ratio: [0.4, 1.0]
        use_provided_mask: 0.8
        mask: 1.0
        mask_low_to_high: 0.0
        mask_low_size: [32, 32]
        multi_image_mask: False 
        return_data_keys: [image, mask]
        stroken_mask_params:
          maxVertex: 10
          minVertex: 5
          maxLength: 100
          maxBrushWidth: 30
          minBrushWidth: 10
          keep_ratio: [0.3, 0.6]
          min_area: 64 # 8*8 we set the receptive field as the min masked area
        
        im_preprocessor_config:
          target: image_synthesis.data.utils.image_preprocessor.SimplePreprocessor
          params:
            size: [256, 256]
            smallest_max_size: 256