full topk function

dvschultz · dvschultz · commit b82fde85112b · 2021-06-30T00:12:17.000-04:00
diff --git a/train.py b/train.py
@@ -250,9 +250,9 @@ def setup_training_loop_kwargs(
 
     if topk is not None:
         assert isinstance(topk, float)
-        args.loss_args.G_top_k = True
-        args.loss_args.G_top_k_gamma = topk
-        args.loss_args.G_top_k_frac = 0.5
+        args.loss_kwargs.G_top_k = True
+        args.loss_kwargs.G_top_k_gamma = topk
+        args.loss_kwargs.G_top_k_frac = 0.5
 
     # ---------------------------------------------------
     # Discriminator augmentation: aug, p, target, augpipe
diff --git a/training/loss.py b/training/loss.py
@@ -34,6 +34,10 @@ def __init__(self, device, G_mapping, G_synthesis, D, augment_pipe=None, style_m
         self.pl_decay = pl_decay
         self.pl_weight = pl_weight
         self.pl_mean = torch.zeros([], device=device)
+        self.G_top_k = G_top_k
+        self.G_top_k_gamma = G_top_k_gamma
+        self.G_top_k_frac = G_top_k_frac
+
 
     def run_G(self, z, c, sync):
         with misc.ddp_sync(self.G_mapping, sync):
@@ -68,6 +72,19 @@ def accumulate_gradients(self, phase, real_img, real_c, gen_z, gen_c, sync, gain
                 gen_logits = self.run_D(gen_img, gen_c, sync=False)
                 training_stats.report('Loss/scores/fake', gen_logits)
                 training_stats.report('Loss/signs/fake', gen_logits.sign())
+                
+                # top-k function based on: https://github.com/dvschultz/stylegan2-ada/blob/main/training/loss.py#L102
+                if G_top_k:
+                    D_fake_scores = gen_logits
+                    k_frac = torch.max(self.G_top_k_gamma ** self.G_mapping.epochs, self.G_top_k_frac)
+                    print(k_frac)
+                    k = (torch.ceil(minibatch_size.type(torch.float) * k_frac)).type(torch.int)
+                    print(k)
+                    lowest_k_scores, _ = torch.topk(-torch.squeeze(D_fake_scores), k=k) # want smallest probabilities not largest
+                    print(lowest_k_scores)
+                    gen_logits = torch.expand(-lowest_k_scores, axis=1)
+                    print(gen_logits)
+
                 loss_Gmain = torch.nn.functional.softplus(-gen_logits) # -log(sigmoid(gen_logits))
                 training_stats.report('Loss/G/loss', loss_Gmain)
             with torch.autograd.profiler.record_function('Gmain_backward'):
diff --git a/training/networks.py b/training/networks.py
@@ -484,8 +484,6 @@ def __init__(self,
         mapping_kwargs      = {},   # Arguments for MappingNetwork.
         synthesis_kwargs    = {},   # Arguments for SynthesisNetwork.
         epochs              = 0.,       # Track epoch count for top-k
-        nimg                = 0,
-        total_kimg          = 25000,
     ):
         super().__init__()
         self.z_dim = z_dim
@@ -496,13 +494,16 @@ def __init__(self,
         self.synthesis = SynthesisNetwork(w_dim=w_dim, img_resolution=img_resolution, img_channels=img_channels, **synthesis_kwargs)
         self.num_ws = self.synthesis.num_ws
         self.mapping = MappingNetwork(z_dim=z_dim, c_dim=c_dim, w_dim=w_dim, num_ws=self.num_ws, **mapping_kwargs)
-        self.epochs = float(100 * nimg / (total_kimg * 1000)).
+        self.epochs = 0.
 
     def forward(self, z, c, truncation_psi=1, truncation_cutoff=None, **synthesis_kwargs):
         ws = self.mapping(z, c, truncation_psi=truncation_psi, truncation_cutoff=truncation_cutoff)
         img = self.synthesis(ws, **synthesis_kwargs)
         return img
 
+    def update_epochs(self, epoch):
+        self.epochs = epoch
+
 #----------------------------------------------------------------------------
 
 @persistence.persistent_class
diff --git a/training/training_loop.py b/training/training_loop.py
@@ -148,11 +148,11 @@ def training_loop(
     if rank == 0:
         print('Constructing networks...')
     common_kwargs = dict(c_dim=training_set.label_dim, img_resolution=training_set.resolution, img_channels=training_set.num_channels)
-    G = dnnlib.util.construct_class_by_name(**G_kwargs, **common_kwargs, nimg, total_kimg).train().requires_grad_(False).to(device) # subclass of torch.nn.Module
+    G = dnnlib.util.construct_class_by_name(**G_kwargs, **common_kwargs).train().requires_grad_(False).to(device) # subclass of torch.nn.Module
     D = dnnlib.util.construct_class_by_name(**D_kwargs, **common_kwargs).train().requires_grad_(False).to(device) # subclass of torch.nn.Module
     G_ema = copy.deepcopy(G).eval()
 
-    G.epochs = float(100 * nimg / (total_kimg * 1000)) # 100 total top k "epochs" in total_kimg
+    G.update_epochs( float(100 * nimg / (total_kimg * 1000)) ) # 100 total top k "epochs" in total_kimg
     print('starting G epochs: ',G.epochs)
 
     # Resume from existing pickle.
@@ -275,6 +275,8 @@ def training_loop(
             if batch_idx % phase.interval != 0:
                 continue
 
+            G.update_epochs( float(100 * nimg / (total_kimg * 1000)) ) # 100 total top k "epochs" in total_kimg
+
             # Initialize gradient accumulation.
             if phase.start_event is not None:
                 phase.start_event.record(torch.cuda.current_stream(device))