From 0dc03fadeb06fcb84af20e8794da9b85974e5a21 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Feb 2022 00:40:37 +0100
Subject: [PATCH 01/50] Add templates, plugins and models for GR using VAE.

---
 avalanche/models/generator.py                 | 153 ++++++++++++++++++
 avalanche/training/plugins/__init__.py        |   1 +
 .../training/plugins/generative_replay.py     | 122 ++++++++++++++
 .../training/supervised/strategy_wrappers.py  | 137 ++++++++++++++++
 4 files changed, 413 insertions(+)
 create mode 100644 avalanche/models/generator.py
 create mode 100644 avalanche/training/plugins/generative_replay.py

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
new file mode 100644
index 000000000..c70821b53
--- /dev/null
+++ b/avalanche/models/generator.py
@@ -0,0 +1,153 @@
+from matplotlib import transforms
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+from torchvision import transforms
+
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+class Flatten(nn.Module):
+    def __init__(self):
+        super(Flatten, self).__init__()
+    def forward(self, x):
+        batch_size = x.shape[0]
+        return x.view(batch_size, -1)
+
+class MLP(nn.Module):
+    def __init__(self, hidden_size, last_activation = True):
+        super(MLP, self).__init__()
+        q = []
+        for i in range(len(hidden_size)-1):
+            in_dim = hidden_size[i]
+            out_dim = hidden_size[i+1]
+            q.append(("Linear_%d" % i, nn.Linear(in_dim, out_dim)))
+            if (i < len(hidden_size)-2) or ((i == len(hidden_size) - 2) and (last_activation)):
+                q.append(("BatchNorm_%d" % i, nn.BatchNorm1d(out_dim)))
+                q.append(("ReLU_%d" % i, nn.ReLU(inplace=True)))
+        self.mlp = nn.Sequential(OrderedDict(q))
+    def forward(self, x):
+        return self.mlp(x)
+    
+class Encoder(nn.Module):
+    def __init__(self, shape, nhid = 16, ncond = 0):
+        super(Encoder, self).__init__()
+        c, h, w = shape
+        ww = ((w-8)//2 - 4)//2
+        hh = ((h-8)//2 - 4)//2
+        self.encode = nn.Sequential(
+            Flatten(),
+            nn.Linear(in_features=28*28, out_features=400),
+                      nn.BatchNorm1d(400),
+                      nn.LeakyReLU(),
+                                    MLP([400, 128])
+                                   )
+
+    def forward(self, x, y = None):
+        x = self.encode(x)
+        return x
+        if (y is None):
+            return self.calc_mean(x), self.calc_logvar(x)
+        else:
+            return self.calc_mean(torch.cat((x, y), dim=1)), self.calc_logvar(torch.cat((x, y), dim=1))
+
+class Decoder(nn.Module):
+    def __init__(self, shape, nhid = 16, ncond = 0):
+        super(Decoder, self).__init__()
+        c, w, h = shape
+        self.shape = shape
+        self.decode = nn.Sequential(MLP([nhid+ncond, 64, 128, 256, c*w*h], last_activation = False), nn.Sigmoid())
+        self.invTrans = transforms.Compose([
+                                    transforms.Normalize((0.1307,), (0.3081,))
+                        ])
+    def forward(self, z, y = None):
+        c, w, h = self.shape
+        if (y is None):
+            return self.invTrans(self.decode(z).view(-1, c, w, h))
+        else:
+            return self.invTrans(self.decode(torch.cat((z, y), dim=1)).view(-1, c, w, h))
+
+class Solver(nn.Module):
+  def __init__(self, vae, nhid = 16):
+    super().__init__()
+    self.input_dim = nhid
+    self.vae = vae
+
+  def forward(self, x):
+    self.vae.encoder(x)
+
+class VAE(nn.Module):
+    def __init__(self, shape, nhid = 16, n_classes=10):
+        super(VAE, self).__init__()
+        self.dim = nhid
+        self.encoder = Encoder(shape, nhid)
+        self.calc_mean = MLP([128, nhid], last_activation = False)
+        self.calc_logvar = MLP([128, nhid], last_activation = False)
+        self.classification = MLP([128, n_classes], last_activation = False)
+        self.decoder = Decoder(shape, nhid)
+        
+    def sampling(self, mean, logvar):
+        eps = torch.randn(mean.shape).to(device)
+        sigma = 0.5 * torch.exp(logvar)
+        return mean + eps * sigma
+    
+    # Orginial forward of VAE. We modify this to tie it in with Avalanche plugin syntax
+    #def forward(self, x):
+    #    mean, logvar = self.encoder(x)
+    #    z = self.sampling(mean, logvar)
+    #    return self.decoder(z), mean, logvar
+    def forward(self, x):
+        return self.encoder(x)
+    
+    def generate(self, batch_size = None):
+        z = torch.randn((batch_size, self.dim)).to(device) if batch_size else torch.randn((1, self.dim)).to(device)
+        res = self.decoder(z)
+        if not batch_size:
+            res = res.squeeze(0)
+        return res
+
+class cVAE(nn.Module):
+    def __init__(self, shape, nclass, nhid = 16, ncond = 16):
+        super(cVAE, self).__init__()
+        self.dim = nhid
+        self.encoder = Encoder(shape, nhid, ncond = ncond)
+        self.calc_mean = MLP([128+ncond, nhid], last_activation = False)
+        self.calc_logvar = MLP([128+ncond, nhid], last_activation = False)
+        self.decoder = Decoder(shape, nhid, ncond = ncond)
+        self.label_embedding = nn.Embedding(nclass, ncond)
+        
+    def sampling(self, mean, logvar):
+        eps = torch.randn(mean.shape).to(device)
+        sigma = 0.5 * torch.exp(logvar)
+        return mean + eps * sigma
+    
+    def forward(self, x, y):
+        y = self.label_embedding(y)
+        mean, logvar = self.encoder(x, y)
+        z = self.sampling(mean, logvar)
+        return self.decoder(z, y), mean, logvar
+    
+    def generate(self, class_idx):
+        if (type(class_idx) is int):
+            class_idx = torch.tensor(class_idx)
+        class_idx = class_idx.to(device)
+        if (len(class_idx.shape) == 0):
+            batch_size = None
+            class_idx = class_idx.unsqueeze(0)
+            z = torch.randn((1, self.dim)).to(device)
+        else:
+            batch_size = class_idx.shape[0]
+            z = torch.randn((batch_size, self.dim)).to(device) 
+        y = self.label_embedding(class_idx)
+        res = self.decoder(z, y)
+        if not batch_size:
+            res = res.squeeze(0)
+        return res
+
+# Loss functions    
+BCE_loss = nn.BCELoss(reduction = "sum")
+MSE_loss = nn.MSELoss(reduction = "sum")
+CE_loss = nn.CrossEntropyLoss()
+def VAE_loss(X, X_hat, mean, logvar):
+    reconstruction_loss = MSE_loss(X_hat, X)
+    KL_divergence = 0.5 * torch.sum(-1 - logvar + torch.exp(logvar) + mean**2)
+    return reconstruction_loss + KL_divergence
\ No newline at end of file
diff --git a/avalanche/training/plugins/__init__.py b/avalanche/training/plugins/__init__.py
index 8baa0d52c..f69d7256a 100644
--- a/avalanche/training/plugins/__init__.py
+++ b/avalanche/training/plugins/__init__.py
@@ -13,3 +13,4 @@
 from .lfl import LFLPlugin
 from .early_stopping import EarlyStoppingPlugin
 from .lr_scheduling import LRSchedulerPlugin
+from .generative_replay import GenerativeReplayPlugin, VAEPlugin, trainGeneratorPlugin
diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
new file mode 100644
index 000000000..77295280e
--- /dev/null
+++ b/avalanche/training/plugins/generative_replay.py
@@ -0,0 +1,122 @@
+from avalanche.benchmarks.utils.data_loader import ReplayDataLoader
+from avalanche.benchmarks.utils import AvalancheDataset
+from avalanche.core import SupervisedPlugin
+from avalanche.training.templates.supervised import SupervisedTemplate
+import torch
+                                      
+class GenerativeReplayPlugin(SupervisedPlugin):
+    """
+    Experience replay plugin.
+
+    Handles an external memory filled with randomly selected
+    patterns and implementing `before_training_exp` and `after_training_exp`
+    callbacks.
+    The `before_training_exp` callback is implemented in order to use the
+    dataloader that creates mini-batches with examples from both training
+    data and external memory. The examples in the mini-batch is balanced
+    such that there are the same number of examples for each experience.
+
+    The `after_training_exp` callback is implemented in order to add new
+    patterns to the external memory.
+
+    The :mem_size: attribute controls the total number of patterns to be stored
+    in the external memory.
+
+    :param batch_size: the size of the data batch. If set to `None`, it
+        will be set equal to the strategy's batch size.
+    :param batch_size_mem: the size of the memory batch. If
+        `task_balanced_dataloader` is set to True, it must be greater than or
+        equal to the number of tasks. If its value is set to `None`
+        (the default value), it will be automatically set equal to the
+        data batch size.
+    :param task_balanced_dataloader: if True, buffer data loaders will be
+            task-balanced, otherwise it will create a single dataloader for the
+            buffer samples.
+    :param storage_policy: The policy that controls how to add new exemplars
+                           in memory
+    """
+
+    def __init__(self, generator, mem_size: int = 200, batch_size: int = None,
+                 batch_size_mem: int = None,
+                 task_balanced_dataloader: bool = False,
+                 untrained_solver: bool = True):
+        super().__init__()
+        self.mem_size = mem_size
+        self.batch_size = batch_size
+        self.batch_size_mem = batch_size_mem
+        self.task_balanced_dataloader = task_balanced_dataloader
+        self.generator_strategy = generator
+        self.generator = generator.model
+        self.untrained_solver = untrained_solver
+        self.classes_until_now = []
+
+    def before_training_exp(self, strategy: "SupervisedTemplate",
+                            num_workers: int = 0, shuffle: bool = True,
+                            **kwargs):
+        """
+        Dataloader to build batches containing examples from both memories and
+        the training dataset
+        """
+        self.classes_until_now.append(strategy.experience.classes_in_this_experience)
+        
+        print("Classes so far: ", self.classes_until_now, len(self.classes_until_now))
+        if self.untrained_solver:
+          # The solver needs to train on the first experience before it can label generated data
+          # as well as the generator needs to train first.
+          self.untrained_solver = False
+          return
+        #self.classes_until_now = [class_id for exp_classes in self.classes_until_now for class_id in exp_classes]
+        # Sample data from generator
+        memory = self.generator.generate(len(strategy.adapted_dataset)*(len(self.classes_until_now)-1)).to(strategy.device)
+        # Label the generated data using the current solver model
+        strategy.model.eval()
+        with torch.no_grad():
+            memory_output = strategy.model(memory).argmax(dim=-1)
+        strategy.model.train()
+        # Create an AvalancheDataset from memory data and labels
+        memory = AvalancheDataset(torch.utils.data.TensorDataset(memory.detach().cpu(), memory_output.detach().cpu()))
+
+
+        batch_size = self.batch_size
+        if batch_size is None:
+            batch_size = strategy.train_mb_size
+
+        batch_size_mem = self.batch_size_mem
+        if batch_size_mem is None:
+            batch_size_mem = strategy.train_mb_size
+        #Update strategies dataloader by mixing current experience's data with generated data.
+        strategy.dataloader = ReplayDataLoader(
+            strategy.adapted_dataset,
+            memory,
+            batch_size=batch_size,
+            batch_size_mem=batch_size_mem*(len(self.classes_until_now)-1),
+            task_balanced_dataloader=self.task_balanced_dataloader,
+            num_workers=num_workers,
+            shuffle=shuffle)
+
+class VAEPlugin(SupervisedPlugin):
+
+    def after_forward(
+        self, strategy, *args, **kwargs
+    ):
+        # Forward call computes the representations in the latent space. They are stored at strategy.mb_output and can be used here
+          strategy.mean, strategy.logvar = strategy.model.calc_mean(strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
+          z = strategy.model.sampling(strategy.mean, strategy.logvar)
+          strategy.mb_x_recon = strategy.model.decoder(z)
+          
+    def after_eval_forward(
+        self, strategy, *args, **kwargs
+    ):
+        # Forward call computes the representations in the latent space. They are stored at strategy.mb_output and can be used here
+          strategy.mean, strategy.logvar = strategy.model.calc_mean(strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
+          z = strategy.model.sampling(strategy.mean, strategy.logvar)
+          strategy.mb_x_recon = strategy.model.decoder(z)
+
+class trainGeneratorPlugin(SupervisedPlugin):
+    def after_training_exp(self, strategy: "SupervisedTemplate", **kwargs):
+      print("Start training of Generator ... ")
+      #strategy.generator.train(strategy.dataloader)
+      strategy.plugins[1].generator_strategy.train(strategy.experience) 
+      # Originally wanted to train directly on strategy.dataloader which already contains generated data
+      # However training requires an experience which has an attribute dataset with teh entire dataset.
+      # We there do the sampling step again
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 5ad1757c1..6012b963e 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -275,6 +275,143 @@ def __init__(
             **base_kwargs
         )
 
+class GenerativeReplay(SupervisedTemplate):
+    """Naive finetuning.
+
+    The simplest (and least effective) Continual Learning strategy. Naive just
+    incrementally fine tunes a single model without employing any method
+    to contrast the catastrophic forgetting of previous knowledge.
+    This strategy does not use task identities.
+
+    Naive is easy to set up and its results are commonly used to show the worst
+    performing baseline.
+    """
+
+    def __init__(
+        self,
+        model: Module,
+        optimizer: Optimizer,
+        criterion=CrossEntropyLoss(),
+        train_mb_size: int = 1,
+        train_epochs: int = 1,
+        eval_mb_size: int = None,
+        device=None,
+        plugins: Optional[List[SupervisedPlugin]] = None,  #Optional
+        evaluator: EvaluationPlugin = default_evaluator,
+        eval_every=-1,
+        **base_kwargs
+    ):
+        """
+        Creates an instance of the Naive strategy.
+
+        :param model: The model.
+        :param optimizer: The optimizer to use.
+        :param criterion: The loss criterion to use.
+        :param train_mb_size: The train minibatch size. Defaults to 1.
+        :param train_epochs: The number of training epochs. Defaults to 1.
+        :param eval_mb_size: The eval minibatch size. Defaults to 1.
+        :param device: The device to use. Defaults to None (cpu).
+        :param plugins: Plugins to be added. Defaults to None.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` epochs and at the end of the
+            learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseTemplate` constructor arguments.
+        """
+        '''         self.generator = Generator
+                rp = GenerativeReplayPlugin(mem_size, generator=self.generator)
+                vaep = VAEPlugin()
+                if plugins is None:
+                    plugins = [rp, vaep]
+                else:
+                    plugins.append(rp)
+                    plugins.append(vaep) '''
+
+        super().__init__(
+            model,
+            optimizer,
+            criterion,
+            train_mb_size=train_mb_size,
+            train_epochs=train_epochs,
+            eval_mb_size=eval_mb_size,
+            device=device,
+            plugins=plugins,
+            evaluator=evaluator,
+            eval_every=eval_every,
+            **base_kwargs
+        )
+    
+class GenerativeReplayForGenerator(SupervisedTemplate):
+    """Naive finetuning.
+
+    The simplest (and least effective) Continual Learning strategy. Naive just
+    incrementally fine tunes a single model without employing any method
+    to contrast the catastrophic forgetting of previous knowledge.
+    This strategy does not use task identities.
+
+    Naive is easy to set up and its results are commonly used to show the worst
+    performing baseline.
+    """
+
+    def __init__(
+        self,
+        model: Module,
+        optimizer: Optimizer,
+        criterion=CrossEntropyLoss(),
+        train_mb_size: int = 1,
+        train_epochs: int = 1,
+        eval_mb_size: int = None,
+        device=None,
+        plugins: Optional[List[SupervisedPlugin]] = None,  #Optional
+        evaluator: EvaluationPlugin = default_evaluator,
+        eval_every=-1,
+        **base_kwargs
+    ):
+        """
+        Creates an instance of the Naive strategy.
+
+        :param model: The model.
+        :param optimizer: The optimizer to use.
+        :param criterion: The loss criterion to use.
+        :param train_mb_size: The train minibatch size. Defaults to 1.
+        :param train_epochs: The number of training epochs. Defaults to 1.
+        :param eval_mb_size: The eval minibatch size. Defaults to 1.
+        :param device: The device to use. Defaults to None (cpu).
+        :param plugins: Plugins to be added. Defaults to None.
+        :param evaluator: (optional) instance of EvaluationPlugin for logging
+            and metric computations.
+        :param eval_every: the frequency of the calls to `eval` inside the
+            training loop. -1 disables the evaluation. 0 means `eval` is called
+            only at the end of the learning experience. Values >0 mean that
+            `eval` is called every `eval_every` epochs and at the end of the
+            learning experience.
+        :param **base_kwargs: any additional
+            :class:`~avalanche.training.BaseTemplate` constructor arguments.
+        """
+        self.model = model
+        plugins.append(GenerativeReplayPlugin(generator=self))
+        super().__init__(
+            model,
+            optimizer,
+            criterion,
+            train_mb_size=train_mb_size,
+            train_epochs=train_epochs,
+            eval_mb_size=eval_mb_size,
+            device=device,
+            plugins=plugins,
+            #evaluator=evaluator,
+            eval_every=eval_every,
+            **base_kwargs
+        )
+
+    def criterion(self):
+        """Loss function."""
+        return self._criterion(self.mb_x ,self.mb_x_recon, self.mean, self.logvar)
+
 
 class GSS_greedy(SupervisedTemplate):
     """Experience replay strategy.

From 945777f7e2fd6bdc8fa7107ecc4bab1603a28034 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Feb 2022 00:54:59 +0100
Subject: [PATCH 02/50] Update __init__.py and imports.

---
 avalanche/models/__init__.py                       | 1 +
 avalanche/training/supervised/strategy_wrappers.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/avalanche/models/__init__.py b/avalanche/models/__init__.py
index 5e9baf6c2..963184e10 100644
--- a/avalanche/models/__init__.py
+++ b/avalanche/models/__init__.py
@@ -19,3 +19,4 @@
 from .base_model import BaseModel
 from .helper_method import as_multitask
 from .pnn import PNN
+from .generator import VAE, VAE_loss
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 6012b963e..b03d950c4 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -19,6 +19,7 @@
     SupervisedPlugin,
     CWRStarPlugin,
     ReplayPlugin,
+    GenerativeReplayPlugin,
     GDumbPlugin,
     LwFPlugin,
     AGEMPlugin,

From 4d6f4980abe42bc5c53b7b528ac029a2751480dd Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 2 Mar 2022 13:23:04 +0100
Subject: [PATCH 03/50] PEP8 formatting.

---
 .../training/supervised/strategy_wrappers.py    | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index b03d950c4..58d642829 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -276,6 +276,7 @@ def __init__(
             **base_kwargs
         )
 
+
 class GenerativeReplay(SupervisedTemplate):
     """Naive finetuning.
 
@@ -297,7 +298,7 @@ def __init__(
         train_epochs: int = 1,
         eval_mb_size: int = None,
         device=None,
-        plugins: Optional[List[SupervisedPlugin]] = None,  #Optional
+        plugins: Optional[List[SupervisedPlugin]] = None,  # Optional
         evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -323,14 +324,14 @@ def __init__(
         :param **base_kwargs: any additional
             :class:`~avalanche.training.BaseTemplate` constructor arguments.
         """
-        '''         self.generator = Generator
+        """         self.generator = Generator
                 rp = GenerativeReplayPlugin(mem_size, generator=self.generator)
                 vaep = VAEPlugin()
                 if plugins is None:
                     plugins = [rp, vaep]
                 else:
                     plugins.append(rp)
-                    plugins.append(vaep) '''
+                    plugins.append(vaep) """
 
         super().__init__(
             model,
@@ -345,7 +346,8 @@ def __init__(
             eval_every=eval_every,
             **base_kwargs
         )
-    
+
+
 class GenerativeReplayForGenerator(SupervisedTemplate):
     """Naive finetuning.
 
@@ -367,7 +369,7 @@ def __init__(
         train_epochs: int = 1,
         eval_mb_size: int = None,
         device=None,
-        plugins: Optional[List[SupervisedPlugin]] = None,  #Optional
+        plugins: Optional[List[SupervisedPlugin]] = None,  # Optional
         evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -404,14 +406,15 @@ def __init__(
             eval_mb_size=eval_mb_size,
             device=device,
             plugins=plugins,
-            #evaluator=evaluator,
+            # evaluator=evaluator,
             eval_every=eval_every,
             **base_kwargs
         )
 
     def criterion(self):
         """Loss function."""
-        return self._criterion(self.mb_x ,self.mb_x_recon, self.mean, self.logvar)
+        return self._criterion(self.mb_x, self.mb_x_recon, 
+                               self.mean, self.logvar)
 
 
 class GSS_greedy(SupervisedTemplate):

From 82d5dc8d07f9ca91cd51179e8256b8e48cd23d13 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 2 Mar 2022 18:14:53 +0100
Subject: [PATCH 04/50] Incorporate plugins and generator strategy into init of
 solver strategy.

---
 .../training/plugins/generative_replay.py     | 60 +++++++++++--------
 .../training/supervised/strategy_wrappers.py  | 35 ++++++++---
 2 files changed, 61 insertions(+), 34 deletions(-)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index 77295280e..85245278d 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -3,7 +3,8 @@
 from avalanche.core import SupervisedPlugin
 from avalanche.training.templates.supervised import SupervisedTemplate
 import torch
-                                      
+
+
 class GenerativeReplayPlugin(SupervisedPlugin):
     """
     Experience replay plugin.
@@ -57,25 +58,28 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
         Dataloader to build batches containing examples from both memories and
         the training dataset
         """
-        self.classes_until_now.append(strategy.experience.classes_in_this_experience)
-        
-        print("Classes so far: ", self.classes_until_now, len(self.classes_until_now))
+        self.classes_until_now.append(
+            strategy.experience.classes_in_this_experience)
+
+        print("Classes so far: ", self.classes_until_now,
+              len(self.classes_until_now))
         if self.untrained_solver:
-          # The solver needs to train on the first experience before it can label generated data
-          # as well as the generator needs to train first.
-          self.untrained_solver = False
-          return
-        #self.classes_until_now = [class_id for exp_classes in self.classes_until_now for class_id in exp_classes]
+            # The solver needs to train on the first experience before it can label generated data
+            # as well as the generator needs to train first.
+            self.untrained_solver = False
+            return
+        # self.classes_until_now = [class_id for exp_classes in self.classes_until_now for class_id in exp_classes]
         # Sample data from generator
-        memory = self.generator.generate(len(strategy.adapted_dataset)*(len(self.classes_until_now)-1)).to(strategy.device)
+        memory = self.generator.generate(
+            len(strategy.adapted_dataset)*(len(self.classes_until_now)-1)).to(strategy.device)
         # Label the generated data using the current solver model
         strategy.model.eval()
         with torch.no_grad():
             memory_output = strategy.model(memory).argmax(dim=-1)
         strategy.model.train()
         # Create an AvalancheDataset from memory data and labels
-        memory = AvalancheDataset(torch.utils.data.TensorDataset(memory.detach().cpu(), memory_output.detach().cpu()))
-
+        memory = AvalancheDataset(torch.utils.data.TensorDataset(
+            memory.detach().cpu(), memory_output.detach().cpu()))
 
         batch_size = self.batch_size
         if batch_size is None:
@@ -84,7 +88,7 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
         batch_size_mem = self.batch_size_mem
         if batch_size_mem is None:
             batch_size_mem = strategy.train_mb_size
-        #Update strategies dataloader by mixing current experience's data with generated data.
+        # Update strategies dataloader by mixing current experience's data with generated data.
         strategy.dataloader = ReplayDataLoader(
             strategy.adapted_dataset,
             memory,
@@ -94,29 +98,33 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
             num_workers=num_workers,
             shuffle=shuffle)
 
+
 class VAEPlugin(SupervisedPlugin):
 
     def after_forward(
         self, strategy, *args, **kwargs
     ):
         # Forward call computes the representations in the latent space. They are stored at strategy.mb_output and can be used here
-          strategy.mean, strategy.logvar = strategy.model.calc_mean(strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
-          z = strategy.model.sampling(strategy.mean, strategy.logvar)
-          strategy.mb_x_recon = strategy.model.decoder(z)
-          
+        strategy.mean, strategy.logvar = strategy.model.calc_mean(
+            strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
+        z = strategy.model.sampling(strategy.mean, strategy.logvar)
+        strategy.mb_x_recon = strategy.model.decoder(z)
+
     def after_eval_forward(
         self, strategy, *args, **kwargs
     ):
         # Forward call computes the representations in the latent space. They are stored at strategy.mb_output and can be used here
-          strategy.mean, strategy.logvar = strategy.model.calc_mean(strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
-          z = strategy.model.sampling(strategy.mean, strategy.logvar)
-          strategy.mb_x_recon = strategy.model.decoder(z)
+        strategy.mean, strategy.logvar = strategy.model.calc_mean(
+            strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
+        z = strategy.model.sampling(strategy.mean, strategy.logvar)
+        strategy.mb_x_recon = strategy.model.decoder(z)
+
 
 class trainGeneratorPlugin(SupervisedPlugin):
     def after_training_exp(self, strategy: "SupervisedTemplate", **kwargs):
-      print("Start training of Generator ... ")
-      #strategy.generator.train(strategy.dataloader)
-      strategy.plugins[1].generator_strategy.train(strategy.experience) 
-      # Originally wanted to train directly on strategy.dataloader which already contains generated data
-      # However training requires an experience which has an attribute dataset with teh entire dataset.
-      # We there do the sampling step again
+        print("Start training of Generator ... ")
+        # strategy.generator.train(strategy.dataloader)
+        strategy.plugins[1].generator_strategy.train(strategy.experience) 
+        # Originally wanted to train directly on strategy.dataloader which already contains generated data
+        # However training requires an experience which has an attribute dataset with teh entire dataset.
+        # We there do the sampling step again
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 58d642829..4a96c1ef6 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -20,6 +20,8 @@
     CWRStarPlugin,
     ReplayPlugin,
     GenerativeReplayPlugin,
+    VAEPlugin,
+    trainGeneratorPlugin,
     GDumbPlugin,
     LwFPlugin,
     AGEMPlugin,
@@ -32,6 +34,7 @@
     LFLPlugin,
 )
 from avalanche.training.templates.supervised import SupervisedTemplate
+from avalanche.models.generator import VAE, VAE_loss
 
 
 class Naive(SupervisedTemplate):
@@ -324,14 +327,24 @@ def __init__(
         :param **base_kwargs: any additional
             :class:`~avalanche.training.BaseTemplate` constructor arguments.
         """
-        """         self.generator = Generator
-                rp = GenerativeReplayPlugin(mem_size, generator=self.generator)
-                vaep = VAEPlugin()
-                if plugins is None:
-                    plugins = [rp, vaep]
-                else:
-                    plugins.append(rp)
-                    plugins.append(vaep) """
+
+        # By default we use a fully-connected VAE. The user should be able to input their own generator.
+        self.generator = VAE((1, 28, 28), nhid=2)
+        lr = 0.01
+        from torch.optim import Adam  # this should go to the model file
+        optimizer_generator = Adam(filter(
+            lambda p: p.requires_grad, self.generator.parameters()), lr=lr, weight_decay=0.0001)
+
+        gg = GenerativeReplayForGenerator(model=self.generator, optimizer=optimizer_generator, criterion=VAE_loss, train_mb_size=64, train_epochs=10,
+                                          eval_mb_size=32, device=device)
+        rp = GenerativeReplayPlugin(generator=gg)
+
+        tgp = trainGeneratorPlugin()
+        if plugins is None:
+            plugins = [tgp, rp]
+        else:
+            plugins.append(tgp)
+            plugins.append(rp)
 
         super().__init__(
             model,
@@ -395,6 +408,12 @@ def __init__(
         :param **base_kwargs: any additional
             :class:`~avalanche.training.BaseTemplate` constructor arguments.
         """
+        vaep = VAEPlugin()
+        if plugins is None:
+            plugins = [vaep]
+        else:
+            plugins.append(vaep)
+
         self.model = model
         plugins.append(GenerativeReplayPlugin(generator=self))
         super().__init__(

From 2d63e89e5afb50492562f2efe490ef2266bfeb63 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 16:58:27 +0100
Subject: [PATCH 05/50] Add documentation to VAE model.

---
 avalanche/models/generator.py                 | 202 +++++++++++-------
 .../training/supervised/strategy_wrappers.py  |  10 +-
 2 files changed, 135 insertions(+), 77 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index c70821b53..aa8c5d863 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -1,153 +1,205 @@
+################################################################################
+# Copyright (c) 2017. Vincenzo Lomonaco. All rights reserved.                  #
+# Copyrights licensed under the MIT License.                                   #
+# See the accompanying LICENSE file for terms.                                 #
+#                                                                              #
+# Date: 03-03-2022                                                              #
+# Author: Florian Mies                                                         #
+# Website: https://github.com/travela                                          #
+################################################################################
+
+"""
+
+File to place any kind of generative models 
+and their respective helper functions.
+
+"""
+
+from abc import abstractmethod
 from matplotlib import transforms
 import torch
 import torch.nn as nn
 from collections import OrderedDict
 from torchvision import transforms
 
+from avalanche.models.base_model import BaseModel
+
+
+class Generator(BaseModel):
+    """
+    A base abstract class for generators
+    """
+
+    @abstractmethod
+    def generate(self, batch_size=None):
+        """
+        Lets the generator sample random samples.
+        Output is either a single sample or, if provided,
+        a batch of samples of size "batch_size" 
+        """
+
+
+###########################
+# VARIATIONAL AUTOENCODER #
+###########################
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
+
 class Flatten(nn.Module):
+    '''
+    Simple nn.Module to flatten each tensor of a batch of tensors.
+    '''
+
     def __init__(self):
         super(Flatten, self).__init__()
+
     def forward(self, x):
         batch_size = x.shape[0]
         return x.view(batch_size, -1)
 
+
 class MLP(nn.Module):
-    def __init__(self, hidden_size, last_activation = True):
+    '''
+    Simple nn.Module to create a multi-layer perceptron 
+    with BatchNorm and ReLU activations.
+
+    :param hidden_size: An array indicating the number of neurons in each layer.
+    :type hidden_size: int[]
+    :param last_activation: Indicates whether to add BatchNorm and ReLU 
+                            after the last layer.
+    :type last_activation: Boolean
+    '''
+
+    def __init__(self, hidden_size, last_activation=True):
         super(MLP, self).__init__()
         q = []
         for i in range(len(hidden_size)-1):
             in_dim = hidden_size[i]
             out_dim = hidden_size[i+1]
             q.append(("Linear_%d" % i, nn.Linear(in_dim, out_dim)))
-            if (i < len(hidden_size)-2) or ((i == len(hidden_size) - 2) and (last_activation)):
+            if (i < len(hidden_size)-2) or ((i == len(hidden_size) - 2)
+                                            and (last_activation)):
                 q.append(("BatchNorm_%d" % i, nn.BatchNorm1d(out_dim)))
                 q.append(("ReLU_%d" % i, nn.ReLU(inplace=True)))
         self.mlp = nn.Sequential(OrderedDict(q))
+
     def forward(self, x):
         return self.mlp(x)
-    
+
+
 class Encoder(nn.Module):
-    def __init__(self, shape, nhid = 16, ncond = 0):
+    '''
+    Encoder part of the VAE, computer the latent represenations of the input.
+
+    :param shape: Shape of the input to the network: (channels, height, width)
+    :param nhid: Dimension of last hidden layer
+    '''
+
+    def __init__(self, shape, nhid=128):
         super(Encoder, self).__init__()
         c, h, w = shape
         ww = ((w-8)//2 - 4)//2
         hh = ((h-8)//2 - 4)//2
         self.encode = nn.Sequential(
             Flatten(),
-            nn.Linear(in_features=28*28, out_features=400),
-                      nn.BatchNorm1d(400),
-                      nn.LeakyReLU(),
-                                    MLP([400, 128])
+            nn.Linear(in_features=h*w, out_features=400),
+            nn.BatchNorm1d(400),
+            nn.LeakyReLU(),
+            MLP([400, nhid])
                                    )
 
-    def forward(self, x, y = None):
+    def forward(self, x, y=None):
         x = self.encode(x)
         return x
-        if (y is None):
-            return self.calc_mean(x), self.calc_logvar(x)
-        else:
-            return self.calc_mean(torch.cat((x, y), dim=1)), self.calc_logvar(torch.cat((x, y), dim=1))
+
 
 class Decoder(nn.Module):
-    def __init__(self, shape, nhid = 16, ncond = 0):
+    '''
+    Decoder part of the VAE. Reverses Encoder.
+
+    :param shape: Shape of output: (channels, height, width).
+    :param nhid: Dimension of input.
+    '''
+
+    def __init__(self, shape, nhid=16):
         super(Decoder, self).__init__()
         c, w, h = shape
         self.shape = shape
-        self.decode = nn.Sequential(MLP([nhid+ncond, 64, 128, 256, c*w*h], last_activation = False), nn.Sigmoid())
+        self.decode = nn.Sequential(
+            MLP([nhid, 64, 128, 256, c*w*h], last_activation=False),
+            nn.Sigmoid())
         self.invTrans = transforms.Compose([
                                     transforms.Normalize((0.1307,), (0.3081,))
                         ])
-    def forward(self, z, y = None):
+
+    def forward(self, z, y=None):
         c, w, h = self.shape
         if (y is None):
             return self.invTrans(self.decode(z).view(-1, c, w, h))
         else:
-            return self.invTrans(self.decode(torch.cat((z, y), dim=1)).view(-1, c, w, h))
-
-class Solver(nn.Module):
-  def __init__(self, vae, nhid = 16):
-    super().__init__()
-    self.input_dim = nhid
-    self.vae = vae
+            return self.invTrans(self.decode(torch.cat((z, y), dim=1))
+                                 .view(-1, c, w, h))
 
-  def forward(self, x):
-    self.vae.encoder(x)
 
 class VAE(nn.Module):
-    def __init__(self, shape, nhid = 16, n_classes=10):
+    '''
+    Variational autoencoder module.
+
+    The encoder only computes the latent represenations
+    and we have then two possible output heads: 
+    One for the usual output distribution and one for classification.
+    The latter is an extension the conventional VAE and incorporates
+    a classifier into the network.
+    More details can be found in: https://arxiv.org/abs/1809.10635
+    '''
+
+    def __init__(self, shape, nhid=16, n_classes=10):
         super(VAE, self).__init__()
         self.dim = nhid
         self.encoder = Encoder(shape, nhid)
-        self.calc_mean = MLP([128, nhid], last_activation = False)
-        self.calc_logvar = MLP([128, nhid], last_activation = False)
-        self.classification = MLP([128, n_classes], last_activation = False)
+        self.calc_mean = MLP([128, nhid], last_activation=False)
+        self.calc_logvar = MLP([128, nhid], last_activation=False)
+        self.classification = MLP([128, n_classes], last_activation=False)
         self.decoder = Decoder(shape, nhid)
-        
+
     def sampling(self, mean, logvar):
         eps = torch.randn(mean.shape).to(device)
         sigma = 0.5 * torch.exp(logvar)
         return mean + eps * sigma
-    
+
     # Orginial forward of VAE. We modify this to tie it in with Avalanche plugin syntax
-    #def forward(self, x):
+    # def forward(self, x):
     #    mean, logvar = self.encoder(x)
     #    z = self.sampling(mean, logvar)
     #    return self.decoder(z), mean, logvar
     def forward(self, x):
         return self.encoder(x)
-    
-    def generate(self, batch_size = None):
-        z = torch.randn((batch_size, self.dim)).to(device) if batch_size else torch.randn((1, self.dim)).to(device)
+
+    def generate(self, batch_size=None):
+        z = torch.randn((batch_size, self.dim)).to(
+            device) if batch_size else torch.randn((1, self.dim)).to(device)
         res = self.decoder(z)
         if not batch_size:
             res = res.squeeze(0)
         return res
 
-class cVAE(nn.Module):
-    def __init__(self, shape, nclass, nhid = 16, ncond = 16):
-        super(cVAE, self).__init__()
-        self.dim = nhid
-        self.encoder = Encoder(shape, nhid, ncond = ncond)
-        self.calc_mean = MLP([128+ncond, nhid], last_activation = False)
-        self.calc_logvar = MLP([128+ncond, nhid], last_activation = False)
-        self.decoder = Decoder(shape, nhid, ncond = ncond)
-        self.label_embedding = nn.Embedding(nclass, ncond)
-        
-    def sampling(self, mean, logvar):
-        eps = torch.randn(mean.shape).to(device)
-        sigma = 0.5 * torch.exp(logvar)
-        return mean + eps * sigma
-    
-    def forward(self, x, y):
-        y = self.label_embedding(y)
-        mean, logvar = self.encoder(x, y)
-        z = self.sampling(mean, logvar)
-        return self.decoder(z, y), mean, logvar
-    
-    def generate(self, class_idx):
-        if (type(class_idx) is int):
-            class_idx = torch.tensor(class_idx)
-        class_idx = class_idx.to(device)
-        if (len(class_idx.shape) == 0):
-            batch_size = None
-            class_idx = class_idx.unsqueeze(0)
-            z = torch.randn((1, self.dim)).to(device)
-        else:
-            batch_size = class_idx.shape[0]
-            z = torch.randn((batch_size, self.dim)).to(device) 
-        y = self.label_embedding(class_idx)
-        res = self.decoder(z, y)
-        if not batch_size:
-            res = res.squeeze(0)
-        return res
 
 # Loss functions    
-BCE_loss = nn.BCELoss(reduction = "sum")
-MSE_loss = nn.MSELoss(reduction = "sum")
+BCE_loss = nn.BCELoss(reduction="sum")
+MSE_loss = nn.MSELoss(reduction="sum")
 CE_loss = nn.CrossEntropyLoss()
+
+
 def VAE_loss(X, X_hat, mean, logvar):
+    '''
+    Loss function of a VAE using mean squared error for reconstruction loss.
+    This is the criterion for VAE training loop.
+
+    :param X: Original input batch.
+    :param X_hat: Reconstructed input after subsequent Encoder and Decoder.
+    :param mean: mean of the VAE output distribution.
+    :param logvar: logvar of the VAE output distribution.
+    '''
     reconstruction_loss = MSE_loss(X_hat, X)
     KL_divergence = 0.5 * torch.sum(-1 - logvar + torch.exp(logvar) + mean**2)
-    return reconstruction_loss + KL_divergence
\ No newline at end of file
+    return reconstruction_loss + KL_divergence
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 4a96c1ef6..5b92dff6c 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -328,8 +328,13 @@ def __init__(
             :class:`~avalanche.training.BaseTemplate` constructor arguments.
         """
 
-        # By default we use a fully-connected VAE. The user should be able to input their own generator.
-        self.generator = VAE((1, 28, 28), nhid=2)
+        # Check if user inputs a generator model
+        if 'generator' in base_kwargs:
+            self.generator = base_kwargs['generator']
+        else:
+            # By default we use a fully-connected VAE.
+            self.generator = VAE((1, 28, 28), nhid=2)
+
         lr = 0.01
         from torch.optim import Adam  # this should go to the model file
         optimizer_generator = Adam(filter(
@@ -340,6 +345,7 @@ def __init__(
         rp = GenerativeReplayPlugin(generator=gg)
 
         tgp = trainGeneratorPlugin()
+
         if plugins is None:
             plugins = [tgp, rp]
         else:

From 2cef28ad0e9d231fe65f3e0033d6b696ea06163c Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 17:14:52 +0100
Subject: [PATCH 06/50] Introduce latent dimension variable for VAE encoder.

---
 avalanche/models/generator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index aa8c5d863..177f3ecf5 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -92,10 +92,10 @@ class Encoder(nn.Module):
     Encoder part of the VAE, computer the latent represenations of the input.
 
     :param shape: Shape of the input to the network: (channels, height, width)
-    :param nhid: Dimension of last hidden layer
+    :param latent_dim: Dimension of last hidden layer
     '''
 
-    def __init__(self, shape, nhid=128):
+    def __init__(self, shape, latent_dim=128):
         super(Encoder, self).__init__()
         c, h, w = shape
         ww = ((w-8)//2 - 4)//2
@@ -156,7 +156,7 @@ class VAE(nn.Module):
     def __init__(self, shape, nhid=16, n_classes=10):
         super(VAE, self).__init__()
         self.dim = nhid
-        self.encoder = Encoder(shape, nhid)
+        self.encoder = Encoder(shape, latent_dim=128)
         self.calc_mean = MLP([128, nhid], last_activation=False)
         self.calc_logvar = MLP([128, nhid], last_activation=False)
         self.classification = MLP([128, n_classes], last_activation=False)

From 8fb9575be8d2bd31eb8ad1d05160caace96e0be5 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 17:21:25 +0100
Subject: [PATCH 07/50] Fix from last commit.

---
 avalanche/models/generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 177f3ecf5..15a4bdee2 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -105,7 +105,7 @@ def __init__(self, shape, latent_dim=128):
             nn.Linear(in_features=h*w, out_features=400),
             nn.BatchNorm1d(400),
             nn.LeakyReLU(),
-            MLP([400, nhid])
+            MLP([400, latent_dim])
                                    )
 
     def forward(self, x, y=None):

From 6ccff43311215752e4e223169650453715827122 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 18:10:12 +0100
Subject: [PATCH 08/50] Documentation; extend GR plugin to work without
 generator initialization; clean up GR template and make it more modular;
 rename VAETraining

---
 .../training/plugins/generative_replay.py     | 17 ++++-
 .../training/supervised/strategy_wrappers.py  | 73 +++++++++++--------
 2 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index 85245278d..7ba5f8198 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -37,7 +37,7 @@ class GenerativeReplayPlugin(SupervisedPlugin):
                            in memory
     """
 
-    def __init__(self, generator, mem_size: int = 200, batch_size: int = None,
+    def __init__(self, generator=None, mem_size: int = 200, batch_size: int = None,
                  batch_size_mem: int = None,
                  task_balanced_dataloader: bool = False,
                  untrained_solver: bool = True):
@@ -47,10 +47,22 @@ def __init__(self, generator, mem_size: int = 200, batch_size: int = None,
         self.batch_size_mem = batch_size_mem
         self.task_balanced_dataloader = task_balanced_dataloader
         self.generator_strategy = generator
-        self.generator = generator.model
+        if self.generator_strategy:
+            self.generator = generator.model
+        else: 
+            self.generator = None
         self.untrained_solver = untrained_solver
         self.classes_until_now = []
 
+    def before_training(self, strategy, *args, **kwargs):
+        """Called before `train` by the `BaseTemplate`."""
+        # If generator is None at this point, 
+        # we can take it that the strategy's model is the generator itself.
+        # This allows us to use easily use this Plugin for generator strategy
+        if not self.generator_strategy:
+            self.generator_strategy = strategy
+            self.generator = strategy.model
+
     def before_training_exp(self, strategy: "SupervisedTemplate",
                             num_workers: int = 0, shuffle: bool = True,
                             **kwargs):
@@ -69,6 +81,7 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
             self.untrained_solver = False
             return
         # self.classes_until_now = [class_id for exp_classes in self.classes_until_now for class_id in exp_classes]
+
         # Sample data from generator
         memory = self.generator.generate(
             len(strategy.adapted_dataset)*(len(self.classes_until_now)-1)).to(strategy.device)
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 5b92dff6c..98f13f4c1 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -281,15 +281,17 @@ def __init__(
 
 
 class GenerativeReplay(SupervisedTemplate):
-    """Naive finetuning.
+    """Generative Replay Strategy
 
-    The simplest (and least effective) Continual Learning strategy. Naive just
-    incrementally fine tunes a single model without employing any method
-    to contrast the catastrophic forgetting of previous knowledge.
-    This strategy does not use task identities.
+    This implements Deep Generative Replay for a Scholar consisting of a Solver,
+    and Generator as described in https://arxiv.org/abs/1705.08690.
 
-    Naive is easy to set up and its results are commonly used to show the worst
-    performing baseline.
+    For the case where the Generator is the model itself that is to be trained,
+    please simply add the GenerativeReplayPlugin(generator=self) to 
+    your Generator's strategy, similar like in the VAETraining class.
+
+    See GenerativeReplayPlugin for more details.
+    This strategy does not use task identities.
     """
 
     def __init__(
@@ -307,7 +309,8 @@ def __init__(
         **base_kwargs
     ):
         """
-        Creates an instance of the Naive strategy.
+        Creates an instance of SupervisedTemplate with the appropriate plugins
+        for generative replay.
 
         :param model: The model.
         :param optimizer: The optimizer to use.
@@ -328,21 +331,26 @@ def __init__(
             :class:`~avalanche.training.BaseTemplate` constructor arguments.
         """
 
-        # Check if user inputs a generator model
+        # Check if user inputs a generator model 
+        # (which is already wrapped in a strategy, see VAETraining as example)
         if 'generator' in base_kwargs:
-            self.generator = base_kwargs['generator']
+            self.generator_strategy = base_kwargs['generator']
         else:
             # By default we use a fully-connected VAE.
-            self.generator = VAE((1, 28, 28), nhid=2)
-
-        lr = 0.01
-        from torch.optim import Adam  # this should go to the model file
-        optimizer_generator = Adam(filter(
-            lambda p: p.requires_grad, self.generator.parameters()), lr=lr, weight_decay=0.0001)
-
-        gg = GenerativeReplayForGenerator(model=self.generator, optimizer=optimizer_generator, criterion=VAE_loss, train_mb_size=64, train_epochs=10,
-                                          eval_mb_size=32, device=device)
-        rp = GenerativeReplayPlugin(generator=gg)
+            lr = 0.01
+            from torch.optim import Adam  # this should go to the model file
+            optimizer_generator = Adam(filter(
+                lambda p: p.requires_grad, self.generator.parameters()), lr=lr,
+                 weight_decay=0.0001)
+            generator = VAE((1, 28, 28), nhid=2)
+            self.generator_strategy = VAETraining(
+                model=generator, 
+                optimizer=optimizer_generator,
+                criterion=VAE_loss, train_mb_size=64, 
+                train_epochs=10,
+                eval_mb_size=32, device=device)
+
+        rp = GenerativeReplayPlugin(generator=self.generator_strategy)
 
         tgp = trainGeneratorPlugin()
 
@@ -367,16 +375,17 @@ def __init__(
         )
 
 
-class GenerativeReplayForGenerator(SupervisedTemplate):
-    """Naive finetuning.
+class VAETraining(SupervisedTemplate):
+    """VAETraining class
 
-    The simplest (and least effective) Continual Learning strategy. Naive just
-    incrementally fine tunes a single model without employing any method
-    to contrast the catastrophic forgetting of previous knowledge.
-    This strategy does not use task identities.
+    This is the training strategy for the VAE model
+    found in the models directory.
+    The actual training loop is modified in the VAEPlugin,
+    go there for more details.
 
-    Naive is easy to set up and its results are commonly used to show the worst
-    performing baseline.
+    This class is meant to add this plugin and 
+    to overwrite the criterion function in order to pass all necessary variables
+    to the VAE loss function.
     """
 
     def __init__(
@@ -420,8 +429,10 @@ def __init__(
         else:
             plugins.append(vaep)
 
-        self.model = model
-        plugins.append(GenerativeReplayPlugin(generator=self))
+        if base_kwargs['generative_replay']:
+            self.model = model
+            plugins.append(GenerativeReplayPlugin(generator=self))
+
         super().__init__(
             model,
             optimizer,
@@ -431,7 +442,7 @@ def __init__(
             eval_mb_size=eval_mb_size,
             device=device,
             plugins=plugins,
-            # evaluator=evaluator,
+            evaluator=evaluator,
             eval_every=eval_every,
             **base_kwargs
         )

From bf2c20e485cbddde894b847da856b4c5a95e33dc Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 18:18:29 +0100
Subject: [PATCH 09/50] Fix introduced bug.

---
 avalanche/training/supervised/strategy_wrappers.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 98f13f4c1..6fb5e204a 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -337,12 +337,13 @@ def __init__(
             self.generator_strategy = base_kwargs['generator']
         else:
             # By default we use a fully-connected VAE.
+            generator = VAE((1, 28, 28), nhid=2)
             lr = 0.01
             from torch.optim import Adam  # this should go to the model file
             optimizer_generator = Adam(filter(
-                lambda p: p.requires_grad, self.generator.parameters()), lr=lr,
+                lambda p: p.requires_grad, generator.parameters()), lr=lr,
                  weight_decay=0.0001)
-            generator = VAE((1, 28, 28), nhid=2)
+
             self.generator_strategy = VAETraining(
                 model=generator, 
                 optimizer=optimizer_generator,

From bc8d7ee7ceb99072add345adf7ff221b665ef943 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 18:26:11 +0100
Subject: [PATCH 10/50] Add boolean to VAETraining call.

---
 avalanche/training/supervised/strategy_wrappers.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 6fb5e204a..7842563f7 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -349,7 +349,7 @@ def __init__(
                 optimizer=optimizer_generator,
                 criterion=VAE_loss, train_mb_size=64, 
                 train_epochs=10,
-                eval_mb_size=32, device=device)
+                eval_mb_size=32, device=device, generative_replay=True)
 
         rp = GenerativeReplayPlugin(generator=self.generator_strategy)
 
@@ -430,7 +430,8 @@ def __init__(
         else:
             plugins.append(vaep)
 
-        if base_kwargs['generative_replay']:
+        if ('generative_replay' in base_kwargs) and base_kwargs['generative_replay']:
+            print("Train VAE with generative replay.")
             self.model = model
             plugins.append(GenerativeReplayPlugin(generator=self))
 

From baf80e443988a5f9bd94e1559106216f53d52bea Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 18:36:11 +0100
Subject: [PATCH 11/50] Fix 2.0

---
 avalanche/training/supervised/strategy_wrappers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 7842563f7..65f645ba9 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -401,6 +401,7 @@ def __init__(
         plugins: Optional[List[SupervisedPlugin]] = None,  # Optional
         evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
+        generative_replay=False,
         **base_kwargs
     ):
         """
@@ -430,7 +431,7 @@ def __init__(
         else:
             plugins.append(vaep)
 
-        if ('generative_replay' in base_kwargs) and base_kwargs['generative_replay']:
+        if generative_replay:
             print("Train VAE with generative replay.")
             self.model = model
             plugins.append(GenerativeReplayPlugin(generator=self))

From 7ba6f96a42ac1074d4de425602a80ce808eafd4e Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 18:58:11 +0100
Subject: [PATCH 12/50] Try to move the GenerativeReplayPlugin call outside of
 the VAETraining class.

---
 avalanche/training/supervised/strategy_wrappers.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 65f645ba9..686c54829 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -349,7 +349,9 @@ def __init__(
                 optimizer=optimizer_generator,
                 criterion=VAE_loss, train_mb_size=64, 
                 train_epochs=10,
-                eval_mb_size=32, device=device, generative_replay=True)
+                eval_mb_size=32, device=device,
+                plugins=[GenerativeReplayPlugin()],
+                generative_replay=False)
 
         rp = GenerativeReplayPlugin(generator=self.generator_strategy)
 
@@ -431,10 +433,10 @@ def __init__(
         else:
             plugins.append(vaep)
 
-        if generative_replay:
-            print("Train VAE with generative replay.")
-            self.model = model
-            plugins.append(GenerativeReplayPlugin(generator=self))
+        # if generative_replay:
+        #    print("Train VAE with generative replay.")
+        #    self.model = model
+        #    plugins.append(GenerativeReplayPlugin(generator=self))
 
         super().__init__(
             model,

From 5c9b5232c0ab4084755a753042e09bc99cbb8961 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Thu, 3 Mar 2022 19:10:13 +0100
Subject: [PATCH 13/50] Remove redundant code. VAETraining can now be trained
 alone, with or without GR simply by adding the Plugin.

---
 avalanche/training/supervised/strategy_wrappers.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 686c54829..b6f404390 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -287,8 +287,8 @@ class GenerativeReplay(SupervisedTemplate):
     and Generator as described in https://arxiv.org/abs/1705.08690.
 
     For the case where the Generator is the model itself that is to be trained,
-    please simply add the GenerativeReplayPlugin(generator=self) to 
-    your Generator's strategy, similar like in the VAETraining class.
+    please simply add the GenerativeReplayPlugin() when instantiating 
+    your Generator's strategy.
 
     See GenerativeReplayPlugin for more details.
     This strategy does not use task identities.
@@ -350,8 +350,7 @@ def __init__(
                 criterion=VAE_loss, train_mb_size=64, 
                 train_epochs=10,
                 eval_mb_size=32, device=device,
-                plugins=[GenerativeReplayPlugin()],
-                generative_replay=False)
+                plugins=[GenerativeReplayPlugin()])
 
         rp = GenerativeReplayPlugin(generator=self.generator_strategy)
 
@@ -403,7 +402,6 @@ def __init__(
         plugins: Optional[List[SupervisedPlugin]] = None,  # Optional
         evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
-        generative_replay=False,
         **base_kwargs
     ):
         """
@@ -433,11 +431,6 @@ def __init__(
         else:
             plugins.append(vaep)
 
-        # if generative_replay:
-        #    print("Train VAE with generative replay.")
-        #    self.model = model
-        #    plugins.append(GenerativeReplayPlugin(generator=self))
-
         super().__init__(
             model,
             optimizer,

From 84730848c6176107d2b9d4d30fadac8ad085e201 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sat, 5 Mar 2022 16:57:34 +0100
Subject: [PATCH 14/50] Document all  GR plugins.

---
 avalanche/models/generator.py                 |  6 +-
 .../training/plugins/generative_replay.py     | 83 +++++++++++--------
 .../training/supervised/strategy_wrappers.py  | 35 +++++---
 3 files changed, 75 insertions(+), 49 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 15a4bdee2..e68ed3a3f 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -3,7 +3,7 @@
 # Copyrights licensed under the MIT License.                                   #
 # See the accompanying LICENSE file for terms.                                 #
 #                                                                              #
-# Date: 03-03-2022                                                              #
+# Date: 03-03-2022                                                             #
 # Author: Florian Mies                                                         #
 # Website: https://github.com/travela                                          #
 ################################################################################
@@ -167,7 +167,9 @@ def sampling(self, mean, logvar):
         sigma = 0.5 * torch.exp(logvar)
         return mean + eps * sigma
 
-    # Orginial forward of VAE. We modify this to tie it in with Avalanche plugin syntax
+    # Orginial forward of VAE.
+    # We modify this to allow for Replay-through-Feedback,
+    # see VAEPlugin for details.
     # def forward(self, x):
     #    mean, logvar = self.encoder(x)
     #    z = self.sampling(mean, logvar)
diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index 7ba5f8198..160b92c36 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -7,21 +7,15 @@
 
 class GenerativeReplayPlugin(SupervisedPlugin):
     """
-    Experience replay plugin.
+    Experience generative replay plugin.
 
-    Handles an external memory filled with randomly selected
-    patterns and implementing `before_training_exp` and `after_training_exp`
-    callbacks.
-    The `before_training_exp` callback is implemented in order to use the
-    dataloader that creates mini-batches with examples from both training
-    data and external memory. The examples in the mini-batch is balanced
-    such that there are the same number of examples for each experience.
+    Updates the Dataloader of a strategy before training an experience
+    by sampling a generator model and weaving the replay data into
+    the original training data. 
 
-    The `after_training_exp` callback is implemented in order to add new
-    patterns to the external memory.
-
-    The :mem_size: attribute controls the total number of patterns to be stored
-    in the external memory.
+    The examples in the created mini-batch contain one part of the original data
+    and one part of generative data for each class 
+    that has been encountered so far.
 
     :param batch_size: the size of the data batch. If set to `None`, it
         will be set equal to the strategy's batch size.
@@ -33,11 +27,14 @@ class GenerativeReplayPlugin(SupervisedPlugin):
     :param task_balanced_dataloader: if True, buffer data loaders will be
             task-balanced, otherwise it will create a single dataloader for the
             buffer samples.
-    :param storage_policy: The policy that controls how to add new exemplars
-                           in memory
+    :param untrained_solver: if True we assume this is the beginning of 
+        a continual learning task and add replay data only from the second 
+        experience onwards, otherwise we sample and add generative replay data
+        before training the first experience. Default to True.
     """
 
-    def __init__(self, generator=None, mem_size: int = 200, batch_size: int = None,
+    def __init__(self, generator=None, mem_size: int = 200, 
+                 batch_size: int = None,
                  batch_size_mem: int = None,
                  task_balanced_dataloader: bool = False,
                  untrained_solver: bool = True):
@@ -55,10 +52,10 @@ def __init__(self, generator=None, mem_size: int = 200, batch_size: int = None,
         self.classes_until_now = []
 
     def before_training(self, strategy, *args, **kwargs):
-        """Called before `train` by the `BaseTemplate`."""
-        # If generator is None at this point, 
-        # we can take it that the strategy's model is the generator itself.
-        # This allows us to use easily use this Plugin for generator strategy
+        """Checks whether we are using a user defined external generator 
+        or we use the strategy's model as the generator. 
+        If the generator is None after initialization 
+        we assume that strategy.model is the generator."""
         if not self.generator_strategy:
             self.generator_strategy = strategy
             self.generator = strategy.model
@@ -67,24 +64,22 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
                             num_workers: int = 0, shuffle: bool = True,
                             **kwargs):
         """
-        Dataloader to build batches containing examples from both memories and
-        the training dataset
+        ReplayDataloader to build batches containing examples from both, 
+        data sampled from the generator and the training dataset.
         """
         self.classes_until_now.append(
             strategy.experience.classes_in_this_experience)
 
-        print("Classes so far: ", self.classes_until_now,
-              len(self.classes_until_now))
         if self.untrained_solver:
-            # The solver needs to train on the first experience before it can label generated data
-            # as well as the generator needs to train first.
+            # The solver needs to be trained to label generated data
+            # the generator needs to be trained before we can sample.
             self.untrained_solver = False
             return
-        # self.classes_until_now = [class_id for exp_classes in self.classes_until_now for class_id in exp_classes]
 
         # Sample data from generator
         memory = self.generator.generate(
-            len(strategy.adapted_dataset)*(len(self.classes_until_now)-1)).to(strategy.device)
+            len(strategy.adapted_dataset) *
+            (len(self.classes_until_now)-1)).to(strategy.device)
         # Label the generated data using the current solver model
         strategy.model.eval()
         with torch.no_grad():
@@ -101,7 +96,8 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
         batch_size_mem = self.batch_size_mem
         if batch_size_mem is None:
             batch_size_mem = strategy.train_mb_size
-        # Update strategies dataloader by mixing current experience's data with generated data.
+        # Update strategy's dataloader by interleaving 
+        # current experience's data with generated data.
         strategy.dataloader = ReplayDataLoader(
             strategy.adapted_dataset,
             memory,
@@ -113,11 +109,19 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
 
 
 class VAEPlugin(SupervisedPlugin):
+    """
+    VAEPlugin which facilitates the conventional training of the models.VAE.
+
+    The VAE's forward call computes the representations in the latent space,
+    'after_forward' computes the remaining steps of the classic VAE forward.
+    """
 
     def after_forward(
         self, strategy, *args, **kwargs
     ):
-        # Forward call computes the representations in the latent space. They are stored at strategy.mb_output and can be used here
+        """
+        Compute the reconstruction of the input and posterior distribution.
+        """
         strategy.mean, strategy.logvar = strategy.model.calc_mean(
             strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
         z = strategy.model.sampling(strategy.mean, strategy.logvar)
@@ -126,7 +130,9 @@ def after_forward(
     def after_eval_forward(
         self, strategy, *args, **kwargs
     ):
-        # Forward call computes the representations in the latent space. They are stored at strategy.mb_output and can be used here
+        """
+        Compute the reconstruction of the input and posterior distribution.
+        """
         strategy.mean, strategy.logvar = strategy.model.calc_mean(
             strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
         z = strategy.model.sampling(strategy.mean, strategy.logvar)
@@ -134,10 +140,15 @@ def after_eval_forward(
 
 
 class trainGeneratorPlugin(SupervisedPlugin):
+    """
+    trainGeneratorPlugin makes sure that after each experience of training 
+    the solver of a scholar model, we also train the generator on the data 
+    of the current experience.
+    """
+
     def after_training_exp(self, strategy: "SupervisedTemplate", **kwargs):
-        print("Start training of Generator ... ")
-        # strategy.generator.train(strategy.dataloader)
+        """
+        The training method expects an Experience object 
+        with a 'dataset' parameter.
+        """
         strategy.plugins[1].generator_strategy.train(strategy.experience) 
-        # Originally wanted to train directly on strategy.dataloader which already contains generated data
-        # However training requires an experience which has an attribute dataset with teh entire dataset.
-        # We there do the sampling step again
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index b6f404390..b938d053c 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -33,6 +33,7 @@
     GSS_greedyPlugin,
     LFLPlugin,
 )
+from avalanche.training.templates.base import BaseTemplate
 from avalanche.training.templates.supervised import SupervisedTemplate
 from avalanche.models.generator import VAE, VAE_loss
 
@@ -283,9 +284,14 @@ def __init__(
 class GenerativeReplay(SupervisedTemplate):
     """Generative Replay Strategy
 
-    This implements Deep Generative Replay for a Scholar consisting of a Solver,
+    This implements Deep Generative Replay for a Scholar consisting of a Solver
     and Generator as described in https://arxiv.org/abs/1705.08690.
 
+    The model parameter should contain the solver. As an optional input
+    a generator can be wrapped in a trainable strategy 
+    and passed through generator_strategy. 
+    By default a simple VAE will be used as generator.
+
     For the case where the Generator is the model itself that is to be trained,
     please simply add the GenerativeReplayPlugin() when instantiating 
     your Generator's strategy.
@@ -303,16 +309,17 @@ def __init__(
         train_epochs: int = 1,
         eval_mb_size: int = None,
         device=None,
-        plugins: Optional[List[SupervisedPlugin]] = None,  # Optional
+        plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
+        generator_strategy: BaseTemplate = None,
         **base_kwargs
     ):
         """
-        Creates an instance of SupervisedTemplate with the appropriate plugins
-        for generative replay.
+        Creates an instance of Generative Replay Strategy 
+        for a solver-generator pair.
 
-        :param model: The model.
+        :param model: The solver model.
         :param optimizer: The optimizer to use.
         :param criterion: The loss criterion to use.
         :param train_mb_size: The train minibatch size. Defaults to 1.
@@ -327,23 +334,29 @@ def __init__(
             only at the end of the learning experience. Values >0 mean that
             `eval` is called every `eval_every` epochs and at the end of the
             learning experience.
+        :param generator_strategy: A trainable strategy with a generative model,
+            which employs GenerativeReplayPlugin. Defaults to None.
         :param **base_kwargs: any additional
             :class:`~avalanche.training.BaseTemplate` constructor arguments.
         """
 
         # Check if user inputs a generator model 
-        # (which is already wrapped in a strategy, see VAETraining as example)
-        if 'generator' in base_kwargs:
-            self.generator_strategy = base_kwargs['generator']
+        # (which is wrapped in a strategy that can be trained and 
+        # uses the GenerativeReplayPlugin;
+        # see 'VAETraining" as an example below.)
+        if generator_strategy is not None:
+            self.generator_strategy = generator_strategy
         else:
-            # By default we use a fully-connected VAE.
+            # By default we use a fully-connected VAE as the generator.
+            # model:
             generator = VAE((1, 28, 28), nhid=2)
+            # optimzer:
             lr = 0.01
-            from torch.optim import Adam  # this should go to the model file
+            from torch.optim import Adam
             optimizer_generator = Adam(filter(
                 lambda p: p.requires_grad, generator.parameters()), lr=lr,
                  weight_decay=0.0001)
-
+            # strategy (with plugin):
             self.generator_strategy = VAETraining(
                 model=generator, 
                 optimizer=optimizer_generator,

From 2d677bd7aa9d30971e16569dcc04cde19066e637 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sat, 5 Mar 2022 17:04:07 +0100
Subject: [PATCH 15/50] Module header.

---
 .../training/plugins/generative_replay.py     | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index 160b92c36..2915bb0bf 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -1,3 +1,19 @@
+################################################################################
+# Copyright (c) 2017. Vincenzo Lomonaco. All rights reserved.                  #
+# Copyrights licensed under the MIT License.                                   #
+# See the accompanying LICENSE file for terms.                                 #
+#                                                                              #
+# Date: 05-03-2022                                                             #
+# Author: Florian Mies                                                         #
+# Website: https://github.com/travela                                          #
+################################################################################
+
+"""
+
+All plugins related to Generative Replay.
+
+"""
+
 from avalanche.benchmarks.utils.data_loader import ReplayDataLoader
 from avalanche.benchmarks.utils import AvalancheDataset
 from avalanche.core import SupervisedPlugin
@@ -38,6 +54,9 @@ def __init__(self, generator=None, mem_size: int = 200,
                  batch_size_mem: int = None,
                  task_balanced_dataloader: bool = False,
                  untrained_solver: bool = True):
+        '''
+        Init.
+        '''
         super().__init__()
         self.mem_size = mem_size
         self.batch_size = batch_size

From 0b96f3895dfba0f3458b24a2aafc719468b11ca5 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 6 Mar 2022 14:41:52 +0100
Subject: [PATCH 16/50] Make VAE more general: any input shape is allowed.

---
 avalanche/models/generator.py                 | 37 +++++++++++++------
 .../training/plugins/generative_replay.py     |  2 +-
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index e68ed3a3f..a92309013 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -1,5 +1,5 @@
 ################################################################################
-# Copyright (c) 2017. Vincenzo Lomonaco. All rights reserved.                  #
+# Copyright (c) 2021 ContinualAI.                                              #
 # Copyrights licensed under the MIT License.                                   #
 # See the accompanying LICENSE file for terms.                                 #
 #                                                                              #
@@ -97,12 +97,10 @@ class Encoder(nn.Module):
 
     def __init__(self, shape, latent_dim=128):
         super(Encoder, self).__init__()
-        c, h, w = shape
-        ww = ((w-8)//2 - 4)//2
-        hh = ((h-8)//2 - 4)//2
+        flattened_size = torch.Size(shape).numel()
         self.encode = nn.Sequential(
             Flatten(),
-            nn.Linear(in_features=h*w, out_features=400),
+            nn.Linear(in_features=flattened_size, out_features=400),
             nn.BatchNorm1d(400),
             nn.LeakyReLU(),
             MLP([400, latent_dim])
@@ -123,27 +121,27 @@ class Decoder(nn.Module):
 
     def __init__(self, shape, nhid=16):
         super(Decoder, self).__init__()
-        c, w, h = shape
+        flattened_size = torch.Size(shape).numel()
         self.shape = shape
         self.decode = nn.Sequential(
-            MLP([nhid, 64, 128, 256, c*w*h], last_activation=False),
+            MLP([nhid, 64, 128, 256, flattened_size], last_activation=False),
             nn.Sigmoid())
         self.invTrans = transforms.Compose([
                                     transforms.Normalize((0.1307,), (0.3081,))
                         ])
 
     def forward(self, z, y=None):
-        c, w, h = self.shape
         if (y is None):
-            return self.invTrans(self.decode(z).view(-1, c, w, h))
+            return self.invTrans(self.decode(z).view(-1, *self.shape))
         else:
             return self.invTrans(self.decode(torch.cat((z, y), dim=1))
-                                 .view(-1, c, w, h))
+                                 .view(-1, *self.shape))
 
 
 class VAE(nn.Module):
     '''
-    Variational autoencoder module.
+    Variational autoencoder module: 
+    fully-connected and suited for any input shape and type.
 
     The encoder only computes the latent represenations
     and we have then two possible output heads: 
@@ -154,6 +152,12 @@ class VAE(nn.Module):
     '''
 
     def __init__(self, shape, nhid=16, n_classes=10):
+        """
+        :param shape: Shape of each input sample
+        :param nhid: Dimension of latent space of Encoder.
+        :param n_classes: Number of classes - 
+                        defines classification head's dimension
+        """
         super(VAE, self).__init__()
         self.dim = nhid
         self.encoder = Encoder(shape, latent_dim=128)
@@ -163,6 +167,9 @@ def __init__(self, shape, nhid=16, n_classes=10):
         self.decoder = Decoder(shape, nhid)
 
     def sampling(self, mean, logvar):
+        """
+        VAE 'reparametrization trick'
+        """
         eps = torch.randn(mean.shape).to(device)
         sigma = 0.5 * torch.exp(logvar)
         return mean + eps * sigma
@@ -175,9 +182,17 @@ def sampling(self, mean, logvar):
     #    z = self.sampling(mean, logvar)
     #    return self.decoder(z), mean, logvar
     def forward(self, x):
+        """
+        Forward. Computes representations of encoder.
+        """
         return self.encoder(x)
 
     def generate(self, batch_size=None):
+        """
+        Generate random samples.
+        Output is either a single sample if batch_size=None,
+        else it is a batch of samples of size "batch_size". 
+        """
         z = torch.randn((batch_size, self.dim)).to(
             device) if batch_size else torch.randn((1, self.dim)).to(device)
         res = self.decoder(z)
diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index 2915bb0bf..b414c98a7 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -1,5 +1,5 @@
 ################################################################################
-# Copyright (c) 2017. Vincenzo Lomonaco. All rights reserved.                  #
+# Copyright (c) 2021 ContinualAI.                                              #
 # Copyrights licensed under the MIT License.                                   #
 # See the accompanying LICENSE file for terms.                                 #
 #                                                                              #

From 1af464ee3f892206363edfdbce6a1f140c905d4c Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 6 Mar 2022 18:22:57 +0100
Subject: [PATCH 17/50] Removing reliance on VAEPlugin.

---
 avalanche/models/generator.py                 | 51 ++++++++++++-------
 avalanche/training/plugins/__init__.py        |  3 +-
 .../training/supervised/strategy_wrappers.py  | 10 +---
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index a92309013..efc7bd617 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -138,7 +138,7 @@ def forward(self, z, y=None):
                                  .view(-1, *self.shape))
 
 
-class VAE(nn.Module):
+class VAE(Generator, nn.Module):
     '''
     Variational autoencoder module: 
     fully-connected and suited for any input shape and type.
@@ -166,24 +166,9 @@ def __init__(self, shape, nhid=16, n_classes=10):
         self.classification = MLP([128, n_classes], last_activation=False)
         self.decoder = Decoder(shape, nhid)
 
-    def sampling(self, mean, logvar):
-        """
-        VAE 'reparametrization trick'
-        """
-        eps = torch.randn(mean.shape).to(device)
-        sigma = 0.5 * torch.exp(logvar)
-        return mean + eps * sigma
-
-    # Orginial forward of VAE.
-    # We modify this to allow for Replay-through-Feedback,
-    # see VAEPlugin for details.
-    # def forward(self, x):
-    #    mean, logvar = self.encoder(x)
-    #    z = self.sampling(mean, logvar)
-    #    return self.decoder(z), mean, logvar
-    def forward(self, x):
+    def get_features(self, x):
         """
-        Forward. Computes representations of encoder.
+        Get features for encoder part given input
         """
         return self.encoder(x)
 
@@ -200,6 +185,33 @@ def generate(self, batch_size=None):
             res = res.squeeze(0)
         return res
 
+    def sampling(self, mean, logvar):
+        """
+        VAE 'reparametrization trick'
+        """
+        eps = torch.randn(mean.shape).to(device)
+        sigma = 0.5 * torch.exp(logvar)
+        return mean + eps * sigma
+
+    # Orginial forward of VAE.
+    # We modify this to allow for Replay-through-Feedback,
+    # see VAEPlugin for details.
+    def forward(self, x):
+        """
+        Forward. 
+        """
+        represntations = self.encoder(x)
+        mean, logvar = self.calc_mean(
+            represntations), self.calc_logvar(represntations)
+        z = self.sampling(mean, logvar)
+        return self.decoder(z), mean, logvar
+
+#    def forward(self, x):
+#        """
+#        Forward. Computes representations of encoder.
+#        """
+#        return self.encoder(x)
+
 
 # Loss functions    
 BCE_loss = nn.BCELoss(reduction="sum")
@@ -207,7 +219,7 @@ def generate(self, batch_size=None):
 CE_loss = nn.CrossEntropyLoss()
 
 
-def VAE_loss(X, X_hat, mean, logvar):
+def VAE_loss(X, forward_output):
     '''
     Loss function of a VAE using mean squared error for reconstruction loss.
     This is the criterion for VAE training loop.
@@ -217,6 +229,7 @@ def VAE_loss(X, X_hat, mean, logvar):
     :param mean: mean of the VAE output distribution.
     :param logvar: logvar of the VAE output distribution.
     '''
+    X_hat, mean, logvar = forward_output
     reconstruction_loss = MSE_loss(X_hat, X)
     KL_divergence = 0.5 * torch.sum(-1 - logvar + torch.exp(logvar) + mean**2)
     return reconstruction_loss + KL_divergence
diff --git a/avalanche/training/plugins/__init__.py b/avalanche/training/plugins/__init__.py
index f69d7256a..580aa5e47 100644
--- a/avalanche/training/plugins/__init__.py
+++ b/avalanche/training/plugins/__init__.py
@@ -13,4 +13,5 @@
 from .lfl import LFLPlugin
 from .early_stopping import EarlyStoppingPlugin
 from .lr_scheduling import LRSchedulerPlugin
-from .generative_replay import GenerativeReplayPlugin, VAEPlugin, trainGeneratorPlugin
+from .generative_replay import GenerativeReplayPlugin, VAEPlugin, \
+    trainGeneratorPlugin
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index b938d053c..a16788012 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -412,7 +412,7 @@ def __init__(
         train_epochs: int = 1,
         eval_mb_size: int = None,
         device=None,
-        plugins: Optional[List[SupervisedPlugin]] = None,  # Optional
+        plugins: Optional[List[SupervisedPlugin]] = None,
         evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
         **base_kwargs
@@ -438,11 +438,6 @@ def __init__(
         :param **base_kwargs: any additional
             :class:`~avalanche.training.BaseTemplate` constructor arguments.
         """
-        vaep = VAEPlugin()
-        if plugins is None:
-            plugins = [vaep]
-        else:
-            plugins.append(vaep)
 
         super().__init__(
             model,
@@ -460,8 +455,7 @@ def __init__(
 
     def criterion(self):
         """Loss function."""
-        return self._criterion(self.mb_x, self.mb_x_recon, 
-                               self.mean, self.logvar)
+        return self._criterion(self.mb_x, self.mb_output)
 
 
 class GSS_greedy(SupervisedTemplate):

From 5edd13ca7bbb74f327ab520e47a06506e208c4a2 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 6 Mar 2022 18:31:42 +0100
Subject: [PATCH 18/50] Set default evaluator for VAE to None.

---
 avalanche/training/supervised/strategy_wrappers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index a16788012..bd423c0d0 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -413,7 +413,7 @@ def __init__(
         eval_mb_size: int = None,
         device=None,
         plugins: Optional[List[SupervisedPlugin]] = None,
-        evaluator: EvaluationPlugin = default_evaluator,
+        evaluator: EvaluationPlugin = None,
         eval_every=-1,
         **base_kwargs
     ):

From 12bcc4e42c85e1fd2450d55dba470a81644a7340 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 6 Mar 2022 18:52:29 +0100
Subject: [PATCH 19/50] Add interactive logger to VAETraining.

---
 avalanche/training/supervised/strategy_wrappers.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index bd423c0d0..25c77dfc6 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -20,7 +20,6 @@
     CWRStarPlugin,
     ReplayPlugin,
     GenerativeReplayPlugin,
-    VAEPlugin,
     trainGeneratorPlugin,
     GDumbPlugin,
     LwFPlugin,
@@ -36,6 +35,7 @@
 from avalanche.training.templates.base import BaseTemplate
 from avalanche.training.templates.supervised import SupervisedTemplate
 from avalanche.models.generator import VAE, VAE_loss
+from avalanche.logging import InteractiveLogger
 
 
 class Naive(SupervisedTemplate):
@@ -407,13 +407,16 @@ def __init__(
         self,
         model: Module,
         optimizer: Optimizer,
-        criterion=CrossEntropyLoss(),
+        criterion=VAE_loss,
         train_mb_size: int = 1,
         train_epochs: int = 1,
         eval_mb_size: int = None,
         device=None,
         plugins: Optional[List[SupervisedPlugin]] = None,
-        evaluator: EvaluationPlugin = None,
+        evaluator: EvaluationPlugin = EvaluationPlugin(
+            loggers=[InteractiveLogger()],
+            suppress_warnings=True,
+            ),
         eval_every=-1,
         **base_kwargs
     ):

From 9de29637cddddb0c42524b7ff1cefe9a1fb6d9b2 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 6 Mar 2022 19:20:22 +0100
Subject: [PATCH 20/50] Bug fix: Generator doesn't have to label its replay
 data; Remove VAEPlugin.

---
 avalanche/models/generator.py                 | 11 +-----
 avalanche/training/plugins/__init__.py        |  3 +-
 .../training/plugins/generative_replay.py     | 37 ++++++++-----------
 .../training/supervised/strategy_wrappers.py  | 13 ++++---
 4 files changed, 24 insertions(+), 40 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index efc7bd617..476c9bee5 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -168,7 +168,7 @@ def __init__(self, shape, nhid=16, n_classes=10):
 
     def get_features(self, x):
         """
-        Get features for encoder part given input
+        Get features for encoder part given input x
         """
         return self.encoder(x)
 
@@ -193,9 +193,6 @@ def sampling(self, mean, logvar):
         sigma = 0.5 * torch.exp(logvar)
         return mean + eps * sigma
 
-    # Orginial forward of VAE.
-    # We modify this to allow for Replay-through-Feedback,
-    # see VAEPlugin for details.
     def forward(self, x):
         """
         Forward. 
@@ -206,12 +203,6 @@ def forward(self, x):
         z = self.sampling(mean, logvar)
         return self.decoder(z), mean, logvar
 
-#    def forward(self, x):
-#        """
-#        Forward. Computes representations of encoder.
-#        """
-#        return self.encoder(x)
-
 
 # Loss functions    
 BCE_loss = nn.BCELoss(reduction="sum")
diff --git a/avalanche/training/plugins/__init__.py b/avalanche/training/plugins/__init__.py
index 580aa5e47..02d2e3f92 100644
--- a/avalanche/training/plugins/__init__.py
+++ b/avalanche/training/plugins/__init__.py
@@ -13,5 +13,4 @@
 from .lfl import LFLPlugin
 from .early_stopping import EarlyStoppingPlugin
 from .lr_scheduling import LRSchedulerPlugin
-from .generative_replay import GenerativeReplayPlugin, VAEPlugin, \
-    trainGeneratorPlugin
+from .generative_replay import GenerativeReplayPlugin, trainGeneratorPlugin
diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index b414c98a7..6a56f6f55 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -68,6 +68,7 @@ def __init__(self, generator=None, mem_size: int = 200,
         else: 
             self.generator = None
         self.untrained_solver = untrained_solver
+        self.model_is_generator = False
         self.classes_until_now = []
 
     def before_training(self, strategy, *args, **kwargs):
@@ -78,6 +79,7 @@ def before_training(self, strategy, *args, **kwargs):
         if not self.generator_strategy:
             self.generator_strategy = strategy
             self.generator = strategy.model
+            self.model_is_generator = True
 
     def before_training_exp(self, strategy: "SupervisedTemplate",
                             num_workers: int = 0, shuffle: bool = True,
@@ -99,11 +101,16 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
         memory = self.generator.generate(
             len(strategy.adapted_dataset) *
             (len(self.classes_until_now)-1)).to(strategy.device)
-        # Label the generated data using the current solver model
-        strategy.model.eval()
-        with torch.no_grad():
-            memory_output = strategy.model(memory).argmax(dim=-1)
-        strategy.model.train()
+        # Label the generated data using the current solver model, 
+        # in case there is a solver
+        if not self.model_is_generator:
+            strategy.model.eval()
+            with torch.no_grad():
+                memory_output = strategy.model(memory).argmax(dim=-1)
+            strategy.model.train()
+        else:
+            # Mock labels:
+            memory_output = torch.zeros(memory.shape[0])
         # Create an AvalancheDataset from memory data and labels
         memory = AvalancheDataset(torch.utils.data.TensorDataset(
             memory.detach().cpu(), memory_output.detach().cpu()))
@@ -127,9 +134,9 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
             shuffle=shuffle)
 
 
-class VAEPlugin(SupervisedPlugin):
+class RtFPlugin(SupervisedPlugin):
     """
-    VAEPlugin which facilitates the conventional training of the models.VAE.
+    RtFPlugin which facilitates the conventional training of the models.VAE.
 
     The VAE's forward call computes the representations in the latent space,
     'after_forward' computes the remaining steps of the classic VAE forward.
@@ -141,21 +148,7 @@ def after_forward(
         """
         Compute the reconstruction of the input and posterior distribution.
         """
-        strategy.mean, strategy.logvar = strategy.model.calc_mean(
-            strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
-        z = strategy.model.sampling(strategy.mean, strategy.logvar)
-        strategy.mb_x_recon = strategy.model.decoder(z)
-
-    def after_eval_forward(
-        self, strategy, *args, **kwargs
-    ):
-        """
-        Compute the reconstruction of the input and posterior distribution.
-        """
-        strategy.mean, strategy.logvar = strategy.model.calc_mean(
-            strategy.mb_output), strategy.model.calc_logvar(strategy.mb_output)
-        z = strategy.model.sampling(strategy.mean, strategy.logvar)
-        strategy.mb_x_recon = strategy.model.decoder(z)
+        print("Replay-through-Feedback to be implemented soon.")
 
 
 class trainGeneratorPlugin(SupervisedPlugin):
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 25c77dfc6..0ebb642c7 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -395,12 +395,12 @@ class VAETraining(SupervisedTemplate):
 
     This is the training strategy for the VAE model
     found in the models directory.
-    The actual training loop is modified in the VAEPlugin,
-    go there for more details.
+    We make use of the SupervisedTemplate, even though technically this is not a
+    supervised training. However, this reduces the modification to a minimum.
 
-    This class is meant to add this plugin and 
-    to overwrite the criterion function in order to pass all necessary variables
-    to the VAE loss function.
+    We only need to overwrite the criterion function in order to pass all 
+    necessary variables to the VAE loss function. 
+    Furthermore we remove all metrics from the evaluator.
     """
 
     def __init__(
@@ -457,7 +457,8 @@ def __init__(
         )
 
     def criterion(self):
-        """Loss function."""
+        """Adapt input to criterion as needed to compute reconstruction loss 
+        and KL divergence. See default criterion VAELoss."""
         return self._criterion(self.mb_x, self.mb_output)
 
 

From 5bec8a99c86cb87829197fb2643db101d73a3e86 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 6 Mar 2022 19:57:26 +0100
Subject: [PATCH 21/50] Doc.

---
 avalanche/training/plugins/generative_replay.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index 6a56f6f55..a83513094 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -92,7 +92,7 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
             strategy.experience.classes_in_this_experience)
 
         if self.untrained_solver:
-            # The solver needs to be trained to label generated data
+            # The solver needs to be trained before labelling generated data and
             # the generator needs to be trained before we can sample.
             self.untrained_solver = False
             return

From ff87dbb4952101f6cb9630eabaf399bad445a05e Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Tue, 8 Mar 2022 17:38:40 +0100
Subject: [PATCH 22/50] Change CI workflow to run unittest for
 generative_replay branch.

---
 .github/workflows/unit-test.yml | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
index bbf3711f4..1d5d35f36 100644
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -15,17 +15,19 @@ on:
   push:
     branches:
       - master
+      - generative_replay
     paths:
-      - '**.py'
-      - '.github/workflows/unit-test.yml'
-      - 'environment.yml'
+      - "**.py"
+      - ".github/workflows/unit-test.yml"
+      - "environment.yml"
   pull_request:
     branches:
       - master
+      - generative_replay
     paths:
-      - '**.py'
-      - '.github/workflows/unit-test.yml'
-      - 'environment.yml'
+      - "**.py"
+      - ".github/workflows/unit-test.yml"
+      - "environment.yml"
 
 jobs:
   unit-test:
@@ -36,7 +38,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ "3.6", "3.7", "3.8", "3.9"]
+        python-version: ["3.6", "3.7", "3.8", "3.9"]
     defaults:
       run:
         shell: bash -l {0}

From 171bd399ede874493a9ee957eb3361c37dce916e Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Tue, 8 Mar 2022 18:05:56 +0100
Subject: [PATCH 23/50] Create splitMNIST example.

---
 .../training/supervised/strategy_wrappers.py  |   2 +
 examples/generative_replay_splitMNIST.py      | 116 ++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 examples/generative_replay_splitMNIST.py

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 0ebb642c7..8ac148b66 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -1107,6 +1107,8 @@ def __init__(
     "PNNStrategy",
     "CWRStar",
     "Replay",
+    "GenerativeReplay",
+    "VAETraining",
     "GDumb",
     "LwF",
     "AGEM",
diff --git a/examples/generative_replay_splitMNIST.py b/examples/generative_replay_splitMNIST.py
new file mode 100644
index 000000000..37d6ba9cb
--- /dev/null
+++ b/examples/generative_replay_splitMNIST.py
@@ -0,0 +1,116 @@
+################################################################################
+# Copyright (c) 2021 ContinualAI.                                              #
+# Copyrights licensed under the MIT License.                                   #
+# See the accompanying LICENSE file for terms.                                 #
+#                                                                              #
+# Date: 12-10-2020                                                             #
+# Author(s): Vincenzo Lomonaco                                                 #
+# E-mail: contact@continualai.org                                              #
+# Website: avalanche.continualai.org                                           #
+################################################################################
+
+"""
+This is a simple example on how to use the Replay strategy.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from os.path import expanduser
+
+import argparse
+import torch
+from torch.nn import CrossEntropyLoss
+from torchvision import transforms
+from torchvision.transforms import ToTensor, RandomCrop
+import torch.optim.lr_scheduler
+from avalanche.benchmarks import SplitMNIST
+from avalanche.models import SimpleMLP
+from avalanche.training.supervised import GenerativeReplay, VAETraining
+from avalanche.training.plugins import ReplayPlugin
+from avalanche.evaluation.metrics import (
+    forgetting_metrics,
+    accuracy_metrics,
+    loss_metrics,
+)
+from avalanche.logging import InteractiveLogger
+from avalanche.training.plugins import EvaluationPlugin
+
+
+def main(args):
+    # --- CONFIG
+    device = torch.device(
+        f"cuda:{args.cuda}"
+        if torch.cuda.is_available() and args.cuda >= 0
+        else "cpu"
+    )
+    n_batches = 5
+    # ---------
+
+    # --- TRANSFORMATIONS
+    train_transform = transforms.Compose(
+        [
+            RandomCrop(28, padding=4),
+            ToTensor(),
+            transforms.Normalize((0.1307,), (0.3081,)),
+        ]
+    )
+    test_transform = transforms.Compose(
+        [ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+    )
+    # ---------
+
+    # --- SCENARIO CREATION
+    scenario = SplitMNIST(n_experiences=10, seed=1234)
+    # ---------
+
+    # MODEL CREATION
+    model = SimpleMLP(num_classes=scenario.n_classes)
+
+    # choose some metrics and evaluation method
+    interactive_logger = InteractiveLogger()
+
+    eval_plugin = EvaluationPlugin(
+        accuracy_metrics(
+            minibatch=True, epoch=True, experience=True, stream=True
+        ),
+        loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
+        forgetting_metrics(experience=True),
+        loggers=[interactive_logger],
+    )
+
+    # CREATE THE STRATEGY INSTANCE (GenerativeReplay)
+    cl_strategy = GenerativeReplay(
+        model,
+        torch.optim.Adam(model.parameters(), lr=0.001),
+        CrossEntropyLoss(),
+        train_mb_size=100,
+        train_epochs=4,
+        eval_mb_size=100,
+        device=device,
+        evaluator=eval_plugin,
+    )
+
+    # TRAINING LOOP
+    print("Starting experiment...")
+    results = []
+    for experience in scenario.train_stream:
+        print("Start of experience ", experience.current_experience)
+        cl_strategy.train(experience)
+        print("Training completed")
+
+        print("Computing accuracy on the whole test set")
+        results.append(cl_strategy.eval(scenario.test_stream))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--cuda",
+        type=int,
+        default=0,
+        help="Select zero-indexed cuda device. -1 to use CPU.",
+    )
+    args = parser.parse_args()
+    main(args)

From e2eddf31b96e740e6dd735c093c2dd1a80233992 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Tue, 8 Mar 2022 18:54:42 +0100
Subject: [PATCH 24/50] Create VAE on MNIST example for GenerativeReplayPlugin.

---
 examples/generative_replay_MNIST_generator.py | 105 ++++++++++++++++++
 examples/generative_replay_splitMNIST.py      |   4 +-
 2 files changed, 106 insertions(+), 3 deletions(-)
 create mode 100644 examples/generative_replay_MNIST_generator.py

diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
new file mode 100644
index 000000000..015bb1bc5
--- /dev/null
+++ b/examples/generative_replay_MNIST_generator.py
@@ -0,0 +1,105 @@
+################################################################################
+# Copyright (c) 2021 ContinualAI.                                              #
+# Copyrights licensed under the MIT License.                                   #
+# See the accompanying LICENSE file for terms.                                 #
+#                                                                              #
+# Date: 12-10-2020                                                             #
+# Author(s): Vincenzo Lomonaco                                                 #
+# E-mail: contact@continualai.org                                              #
+# Website: avalanche.continualai.org                                           #
+################################################################################
+
+"""
+This is a simple example on how to use the Replay strategy.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import torch
+from torch.nn import CrossEntropyLoss
+from torchvision import transforms
+from torchvision.transforms import ToTensor, RandomCrop
+import torch.optim.lr_scheduler
+import matplotlib.pyplot as plt
+import numpy as np
+from avalanche.benchmarks import SplitMNIST
+from avalanche.models import VAE
+from avalanche.training.supervised import VAETraining
+from avalanche.training.plugins import GenerativeReplayPlugin
+from avalanche.logging import InteractiveLogger
+
+
+def main(args):
+    # --- CONFIG
+    device = torch.device(
+        f"cuda:{args.cuda}"
+        if torch.cuda.is_available() and args.cuda >= 0
+        else "cpu"
+    )
+    n_batches = 5
+    # ---------
+
+    # --- TRANSFORMATIONS
+    train_transform = transforms.Compose(
+        [
+            RandomCrop(28, padding=4),
+            ToTensor(),
+            transforms.Normalize((0.1307,), (0.3081,)),
+        ]
+    )
+    test_transform = transforms.Compose(
+        [ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+    )
+    # ---------
+
+    # --- SCENARIO CREATION
+    scenario = SplitMNIST(n_experiences=10, seed=1234)
+    # ---------
+
+    # MODEL CREATION
+    model = VAE((1, 28, 28), nhid=2)
+
+    # choose some metrics and evaluation method
+    interactive_logger = InteractiveLogger()
+
+    # CREATE THE STRATEGY INSTANCE (GenerativeReplay)
+    cl_strategy = VAETraining(
+        model,
+        torch.optim.Adam(model.parameters(), lr=0.001),
+        CrossEntropyLoss(),
+        train_mb_size=100,
+        train_epochs=4,
+        eval_mb_size=100,
+        device=device,
+        plugins=[GenerativeReplayPlugin()]
+    )
+
+    # TRAINING LOOP
+    print("Starting experiment...")
+    for experience in scenario.train_stream:
+        print("Start of experience ", experience.current_experience)
+        cl_strategy.train(experience)
+        print("Training completed")
+
+        samples = model.generate(10)
+        samples = samples.cpu().numpy()
+
+        f, axarr = plt.subplots(1, 10)
+        for j in range(10):
+            axarr[j].imshow(samples[j, 0], cmap="gray")
+        np.vectorize(lambda ax: ax.axis('off'))(axarr)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--cuda",
+        type=int,
+        default=0,
+        help="Select zero-indexed cuda device. -1 to use CPU.",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/examples/generative_replay_splitMNIST.py b/examples/generative_replay_splitMNIST.py
index 37d6ba9cb..1467ae3e4 100644
--- a/examples/generative_replay_splitMNIST.py
+++ b/examples/generative_replay_splitMNIST.py
@@ -17,7 +17,6 @@
 from __future__ import division
 from __future__ import print_function
 
-from os.path import expanduser
 
 import argparse
 import torch
@@ -27,8 +26,7 @@
 import torch.optim.lr_scheduler
 from avalanche.benchmarks import SplitMNIST
 from avalanche.models import SimpleMLP
-from avalanche.training.supervised import GenerativeReplay, VAETraining
-from avalanche.training.plugins import ReplayPlugin
+from avalanche.training.supervised import GenerativeReplay
 from avalanche.evaluation.metrics import (
     forgetting_metrics,
     accuracy_metrics,

From b383954eb609004409babc2fcf60b587426777f9 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Tue, 8 Mar 2022 19:03:32 +0100
Subject: [PATCH 25/50] Change VAE loss function

---
 examples/generative_replay_MNIST_generator.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index 015bb1bc5..2dd02b6c6 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -69,10 +69,8 @@ def main(args):
     cl_strategy = VAETraining(
         model,
         torch.optim.Adam(model.parameters(), lr=0.001),
-        CrossEntropyLoss(),
         train_mb_size=100,
         train_epochs=4,
-        eval_mb_size=100,
         device=device,
         plugins=[GenerativeReplayPlugin()]
     )

From 387e711087a5657e81ef6dfcc983280fbc9255cb Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Tue, 8 Mar 2022 19:07:52 +0100
Subject: [PATCH 26/50] detach() samples.

---
 examples/generative_replay_MNIST_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index 2dd02b6c6..24571b2a7 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -83,7 +83,7 @@ def main(args):
         print("Training completed")
 
         samples = model.generate(10)
-        samples = samples.cpu().numpy()
+        samples = samples.detach().cpu().numpy()
 
         f, axarr = plt.subplots(1, 10)
         for j in range(10):

From e73d3bed10486151e3b6fc30102a28fd7faee09a Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 10:44:06 +0100
Subject: [PATCH 27/50] save plot of generated samples and try to open window.

---
 examples/generative_replay_MNIST_generator.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index 24571b2a7..cbfc4841f 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -77,7 +77,9 @@ def main(args):
 
     # TRAINING LOOP
     print("Starting experiment...")
+    f, axarr = plt.subplots(len(scenario.train_stream), 10)
     for experience in scenario.train_stream:
+        k = 0
         print("Start of experience ", experience.current_experience)
         cl_strategy.train(experience)
         print("Training completed")
@@ -85,11 +87,14 @@ def main(args):
         samples = model.generate(10)
         samples = samples.detach().cpu().numpy()
 
-        f, axarr = plt.subplots(1, 10)
         for j in range(10):
-            axarr[j].imshow(samples[j, 0], cmap="gray")
+            axarr[k, j].imshow(samples[k, j, 0], cmap="gray")
+            axarr[k, 4].set_title("Generated images for experience " + str(k))
         np.vectorize(lambda ax: ax.axis('off'))(axarr)
 
+        plt.savefig("VAE_output_per_exp")
+        plt.show()
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()

From 7265e2373be6a3a20cb3fe709b0c4ca250de51ba Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 10:45:43 +0100
Subject: [PATCH 28/50] Lower number of exp for testing

---
 examples/generative_replay_MNIST_generator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index cbfc4841f..c3a743d6f 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -56,7 +56,7 @@ def main(args):
     # ---------
 
     # --- SCENARIO CREATION
-    scenario = SplitMNIST(n_experiences=10, seed=1234)
+    scenario = SplitMNIST(n_experiences=3, seed=1234)
     # ---------
 
     # MODEL CREATION
@@ -77,7 +77,7 @@ def main(args):
 
     # TRAINING LOOP
     print("Starting experiment...")
-    f, axarr = plt.subplots(len(scenario.train_stream), 10)
+    f, axarr = plt.subplots(scenario.n_experiences, 10)
     for experience in scenario.train_stream:
         k = 0
         print("Start of experience ", experience.current_experience)

From fe892f0d5e6ca405cbd331aa9c88589aacc29e53 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 10:52:52 +0100
Subject: [PATCH 29/50] train 3 exp.

---
 examples/generative_replay_MNIST_generator.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index c3a743d6f..3b80021e9 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -56,7 +56,7 @@ def main(args):
     # ---------
 
     # --- SCENARIO CREATION
-    scenario = SplitMNIST(n_experiences=3, seed=1234)
+    scenario = SplitMNIST(n_experiences=10, seed=1234)
     # ---------
 
     # MODEL CREATION
@@ -78,10 +78,11 @@ def main(args):
     # TRAINING LOOP
     print("Starting experiment...")
     f, axarr = plt.subplots(scenario.n_experiences, 10)
-    for experience in scenario.train_stream:
-        k = 0
-        print("Start of experience ", experience.current_experience)
-        cl_strategy.train(experience)
+    for k in range(3):  # scenario.train_stream:
+        # k = 0
+        print("Start of experience ",
+              scenario.train_stream[k].current_experience)
+        cl_strategy.train(scenario.train_stream[k])
         print("Training completed")
 
         samples = model.generate(10)

From bb1179d6c320b2ba48317ec80529ea26c56ef54d Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 10:56:45 +0100
Subject: [PATCH 30/50] Fix bug

---
 examples/generative_replay_MNIST_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index 3b80021e9..e0e16fe92 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -89,7 +89,7 @@ def main(args):
         samples = samples.detach().cpu().numpy()
 
         for j in range(10):
-            axarr[k, j].imshow(samples[k, j, 0], cmap="gray")
+            axarr[k, j].imshow(samples[j, 0], cmap="gray")
             axarr[k, 4].set_title("Generated images for experience " + str(k))
         np.vectorize(lambda ax: ax.axis('off'))(axarr)
 

From 450cc8cdd328a61f2e1e619390d68b3780fa835a Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 11:45:09 +0100
Subject: [PATCH 31/50] Save all plots in a single file.

---
 examples/generative_replay_MNIST_generator.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index e0e16fe92..350ca7b17 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -78,11 +78,11 @@ def main(args):
     # TRAINING LOOP
     print("Starting experiment...")
     f, axarr = plt.subplots(scenario.n_experiences, 10)
-    for k in range(3):  # scenario.train_stream:
-        # k = 0
+    k = 0
+    for experience in scenario.train_stream:
         print("Start of experience ",
-              scenario.train_stream[k].current_experience)
-        cl_strategy.train(scenario.train_stream[k])
+              experience.current_experience)
+        cl_strategy.train(experience)
         print("Training completed")
 
         samples = model.generate(10)
@@ -92,9 +92,11 @@ def main(args):
             axarr[k, j].imshow(samples[j, 0], cmap="gray")
             axarr[k, 4].set_title("Generated images for experience " + str(k))
         np.vectorize(lambda ax: ax.axis('off'))(axarr)
+        k += 1
 
-        plt.savefig("VAE_output_per_exp")
-        plt.show()
+    f.subplots_adjust(hspace=1.2)
+    plt.savefig("VAE_output_per_exp")
+    plt.show()
 
 
 if __name__ == "__main__":

From 279d56e226dc347328792b22b1c1c7bfe8480f3e Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 12:13:53 +0100
Subject: [PATCH 32/50] Change TrainGeneratorAfterExpPlugin name; use
 current_experience to determine #samples.

---
 avalanche/training/plugins/__init__.py        |  3 +-
 .../training/plugins/generative_replay.py     | 33 ++++---------------
 .../training/supervised/strategy_wrappers.py  |  8 ++---
 3 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/avalanche/training/plugins/__init__.py b/avalanche/training/plugins/__init__.py
index 02d2e3f92..aa5a95401 100644
--- a/avalanche/training/plugins/__init__.py
+++ b/avalanche/training/plugins/__init__.py
@@ -13,4 +13,5 @@
 from .lfl import LFLPlugin
 from .early_stopping import EarlyStoppingPlugin
 from .lr_scheduling import LRSchedulerPlugin
-from .generative_replay import GenerativeReplayPlugin, trainGeneratorPlugin
+from .generative_replay import GenerativeReplayPlugin, \
+    TrainGeneratorAfterExpPlugin
diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index a83513094..b99fbd3a0 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -69,7 +69,6 @@ def __init__(self, generator=None, mem_size: int = 200,
             self.generator = None
         self.untrained_solver = untrained_solver
         self.model_is_generator = False
-        self.classes_until_now = []
 
     def before_training(self, strategy, *args, **kwargs):
         """Checks whether we are using a user defined external generator 
@@ -88,8 +87,6 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
         ReplayDataloader to build batches containing examples from both, 
         data sampled from the generator and the training dataset.
         """
-        self.classes_until_now.append(
-            strategy.experience.classes_in_this_experience)
 
         if self.untrained_solver:
             # The solver needs to be trained before labelling generated data and
@@ -100,7 +97,7 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
         # Sample data from generator
         memory = self.generator.generate(
             len(strategy.adapted_dataset) *
-            (len(self.classes_until_now)-1)).to(strategy.device)
+            (strategy.experience.current_experience)).to(strategy.device)
         # Label the generated data using the current solver model, 
         # in case there is a solver
         if not self.model_is_generator:
@@ -128,34 +125,18 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
             strategy.adapted_dataset,
             memory,
             batch_size=batch_size,
-            batch_size_mem=batch_size_mem*(len(self.classes_until_now)-1),
+            batch_size_mem=batch_size_mem *
+            (strategy.experience.current_experience),
             task_balanced_dataloader=self.task_balanced_dataloader,
             num_workers=num_workers,
             shuffle=shuffle)
 
 
-class RtFPlugin(SupervisedPlugin):
+class TrainGeneratorAfterExpPlugin(SupervisedPlugin):
     """
-    RtFPlugin which facilitates the conventional training of the models.VAE.
-
-    The VAE's forward call computes the representations in the latent space,
-    'after_forward' computes the remaining steps of the classic VAE forward.
-    """
-
-    def after_forward(
-        self, strategy, *args, **kwargs
-    ):
-        """
-        Compute the reconstruction of the input and posterior distribution.
-        """
-        print("Replay-through-Feedback to be implemented soon.")
-
-
-class trainGeneratorPlugin(SupervisedPlugin):
-    """
-    trainGeneratorPlugin makes sure that after each experience of training 
-    the solver of a scholar model, we also train the generator on the data 
-    of the current experience.
+    TrainGeneratorAfterExpPlugin makes sure that after each experience of 
+    training the solver of a scholar model, we also train the generator on the 
+    data of the current experience.
     """
 
     def after_training_exp(self, strategy: "SupervisedTemplate", **kwargs):
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 8ac148b66..3ca0f64c5 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -20,7 +20,7 @@
     CWRStarPlugin,
     ReplayPlugin,
     GenerativeReplayPlugin,
-    trainGeneratorPlugin,
+    TrainGeneratorAfterExpPlugin,
     GDumbPlugin,
     LwFPlugin,
     AGEMPlugin,
@@ -289,8 +289,8 @@ class GenerativeReplay(SupervisedTemplate):
 
     The model parameter should contain the solver. As an optional input
     a generator can be wrapped in a trainable strategy 
-    and passed through generator_strategy. 
-    By default a simple VAE will be used as generator.
+    and passed to the generator_strategy parameter. By default a simple VAE will
+    be used as generator.
 
     For the case where the Generator is the model itself that is to be trained,
     please simply add the GenerativeReplayPlugin() when instantiating 
@@ -367,7 +367,7 @@ def __init__(
 
         rp = GenerativeReplayPlugin(generator=self.generator_strategy)
 
-        tgp = trainGeneratorPlugin()
+        tgp = TrainGeneratorAfterExpPlugin()
 
         if plugins is None:
             plugins = [tgp, rp]

From 3d874ad8da7971d9c0a623067a9cc187f57149c2 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 12:43:37 +0100
Subject: [PATCH 33/50] [General] VAE model exports; try to remove device

---
 avalanche/models/__init__.py  |  2 +-
 avalanche/models/generator.py | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/avalanche/models/__init__.py b/avalanche/models/__init__.py
index 963184e10..cf93390af 100644
--- a/avalanche/models/__init__.py
+++ b/avalanche/models/__init__.py
@@ -19,4 +19,4 @@
 from .base_model import BaseModel
 from .helper_method import as_multitask
 from .pnn import PNN
-from .generator import VAE, VAE_loss
+from .generator import *
diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 476c9bee5..9aba4a1ae 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -42,7 +42,7 @@ def generate(self, batch_size=None):
 ###########################
 # VARIATIONAL AUTOENCODER #
 ###########################
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 
 class Flatten(nn.Module):
@@ -178,8 +178,8 @@ def generate(self, batch_size=None):
         Output is either a single sample if batch_size=None,
         else it is a batch of samples of size "batch_size". 
         """
-        z = torch.randn((batch_size, self.dim)).to(
-            device) if batch_size else torch.randn((1, self.dim)).to(device)
+        z = torch.randn((batch_size, self.dim)
+                        ) if batch_size else torch.randn((1, self.dim))
         res = self.decoder(z)
         if not batch_size:
             res = res.squeeze(0)
@@ -189,7 +189,7 @@ def sampling(self, mean, logvar):
         """
         VAE 'reparametrization trick'
         """
-        eps = torch.randn(mean.shape).to(device)
+        eps = torch.randn(mean.shape)
         sigma = 0.5 * torch.exp(logvar)
         return mean + eps * sigma
 
@@ -224,3 +224,6 @@ def VAE_loss(X, forward_output):
     reconstruction_loss = MSE_loss(X_hat, X)
     KL_divergence = 0.5 * torch.sum(-1 - logvar + torch.exp(logvar) + mean**2)
     return reconstruction_loss + KL_divergence
+
+
+__all__ = ["VAE, VAE_loss"]

From 7aa35658c81a9cb4b1b28bf22667676a5346aaaa Mon Sep 17 00:00:00 2001
From: travela <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 12:55:03 +0100
Subject: [PATCH 34/50] Update generator.py

---
 avalanche/models/generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 9aba4a1ae..38385a259 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -226,4 +226,4 @@ def VAE_loss(X, forward_output):
     return reconstruction_loss + KL_divergence
 
 
-__all__ = ["VAE, VAE_loss"]
+__all__ = ["VAE", "VAE_loss"]

From 4331d82112ab3ab27bed77b8d54749aa41813cb9 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 13:03:29 +0100
Subject: [PATCH 35/50] Pass device to VAE.

---
 avalanche/models/generator.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 9aba4a1ae..fe8639599 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -151,7 +151,7 @@ class VAE(Generator, nn.Module):
     More details can be found in: https://arxiv.org/abs/1809.10635
     '''
 
-    def __init__(self, shape, nhid=16, n_classes=10):
+    def __init__(self, shape, nhid=16, n_classes=10, device="cpu"):
         """
         :param shape: Shape of each input sample
         :param nhid: Dimension of latent space of Encoder.
@@ -160,6 +160,7 @@ def __init__(self, shape, nhid=16, n_classes=10):
         """
         super(VAE, self).__init__()
         self.dim = nhid
+        self.device = device
         self.encoder = Encoder(shape, latent_dim=128)
         self.calc_mean = MLP([128, nhid], last_activation=False)
         self.calc_logvar = MLP([128, nhid], last_activation=False)
@@ -178,8 +179,9 @@ def generate(self, batch_size=None):
         Output is either a single sample if batch_size=None,
         else it is a batch of samples of size "batch_size". 
         """
-        z = torch.randn((batch_size, self.dim)
-                        ) if batch_size else torch.randn((1, self.dim))
+        z = torch.randn((batch_size, self.dim)).to(
+            self.device) if batch_size else torch.randn((1, self.dim)).to(
+                self.device)
         res = self.decoder(z)
         if not batch_size:
             res = res.squeeze(0)
@@ -189,7 +191,7 @@ def sampling(self, mean, logvar):
         """
         VAE 'reparametrization trick'
         """
-        eps = torch.randn(mean.shape)
+        eps = torch.randn(mean.shape).to(self.device)
         sigma = 0.5 * torch.exp(logvar)
         return mean + eps * sigma
 

From ca418eae6bc6b2f2685b647598c7e4edae6ffa98 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 13:04:39 +0100
Subject: [PATCH 36/50] Pass device in strategy to VAE

---
 avalanche/training/supervised/strategy_wrappers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 3ca0f64c5..5de4788d8 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -349,7 +349,7 @@ def __init__(
         else:
             # By default we use a fully-connected VAE as the generator.
             # model:
-            generator = VAE((1, 28, 28), nhid=2)
+            generator = VAE((1, 28, 28), nhid=2, device=device)
             # optimzer:
             lr = 0.01
             from torch.optim import Adam

From 6d817995e9397b64ee1b9184059b7b3618c68c89 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 15:53:47 +0100
Subject: [PATCH 37/50] [General] Docstring

---
 avalanche/training/plugins/generative_replay.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index b99fbd3a0..e7c9f2f3c 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -33,6 +33,12 @@ class GenerativeReplayPlugin(SupervisedPlugin):
     and one part of generative data for each class 
     that has been encountered so far.
 
+    In this version of the plugin the number of replay samples is 
+    increased with each new experience. Another way to implempent 
+    the algorithm is by weighting the loss function and give more 
+    importance to the replayed data as the number of experiences 
+    increases. This will be implemented as an option for the user soon.
+
     :param batch_size: the size of the data batch. If set to `None`, it
         will be set equal to the strategy's batch size.
     :param batch_size_mem: the size of the memory batch. If

From 3d66711967edd16717de23fa2dec528c388ba044 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 16:55:45 +0100
Subject: [PATCH 38/50] Reverse unit-test.yml changes.

---
 .github/workflows/unit-test.yml | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml
index 1d5d35f36..bbf3711f4 100644
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -15,19 +15,17 @@ on:
   push:
     branches:
       - master
-      - generative_replay
     paths:
-      - "**.py"
-      - ".github/workflows/unit-test.yml"
-      - "environment.yml"
+      - '**.py'
+      - '.github/workflows/unit-test.yml'
+      - 'environment.yml'
   pull_request:
     branches:
       - master
-      - generative_replay
     paths:
-      - "**.py"
-      - ".github/workflows/unit-test.yml"
-      - "environment.yml"
+      - '**.py'
+      - '.github/workflows/unit-test.yml'
+      - 'environment.yml'
 
 jobs:
   unit-test:
@@ -38,7 +36,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.6", "3.7", "3.8", "3.9"]
+        python-version: [ "3.6", "3.7", "3.8", "3.9"]
     defaults:
       run:
         shell: bash -l {0}

From e910cf1b784b4cce17d09c45a999f839af8e2a96 Mon Sep 17 00:00:00 2001
From: travela <florian.mies@fu-berlin.de>
Date: Wed, 9 Mar 2022 16:59:00 +0100
Subject: [PATCH 39/50] Remove commented line

---
 avalanche/models/generator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index f8a15a6d2..49c57a06e 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -42,7 +42,6 @@ def generate(self, batch_size=None):
 ###########################
 # VARIATIONAL AUTOENCODER #
 ###########################
-# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 
 class Flatten(nn.Module):

From bba78b919f2b6fff66c0b69147c5e914fc9ab9bc Mon Sep 17 00:00:00 2001
From: travela <florian.mies@fu-berlin.de>
Date: Wed, 23 Mar 2022 19:36:36 +0100
Subject: [PATCH 40/50] Resolve Requested Changes (#3)

* Extend current mbatch with replay data dynamically before each iteration.

* Update boolean after first experience.

* Fix mbatch[-1] extension

* Put replay_output to device.

* Resolve change requests: class names; VAELoss doc

* Documentation.
---
 avalanche/models/generator.py                 | 20 ++---
 .../training/plugins/generative_replay.py     | 78 ++++++++++---------
 2 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 49c57a06e..08a269f2a 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -86,7 +86,7 @@ def forward(self, x):
         return self.mlp(x)
 
 
-class Encoder(nn.Module):
+class VAEEncoder(nn.Module):
     '''
     Encoder part of the VAE, computer the latent represenations of the input.
 
@@ -95,7 +95,7 @@ class Encoder(nn.Module):
     '''
 
     def __init__(self, shape, latent_dim=128):
-        super(Encoder, self).__init__()
+        super(VAEEncoder, self).__init__()
         flattened_size = torch.Size(shape).numel()
         self.encode = nn.Sequential(
             Flatten(),
@@ -110,7 +110,7 @@ def forward(self, x, y=None):
         return x
 
 
-class Decoder(nn.Module):
+class VAEDecoder(nn.Module):
     '''
     Decoder part of the VAE. Reverses Encoder.
 
@@ -119,7 +119,7 @@ class Decoder(nn.Module):
     '''
 
     def __init__(self, shape, nhid=16):
-        super(Decoder, self).__init__()
+        super(VAEDecoder, self).__init__()
         flattened_size = torch.Size(shape).numel()
         self.shape = shape
         self.decode = nn.Sequential(
@@ -160,11 +160,11 @@ def __init__(self, shape, nhid=16, n_classes=10, device="cpu"):
         super(VAE, self).__init__()
         self.dim = nhid
         self.device = device
-        self.encoder = Encoder(shape, latent_dim=128)
+        self.encoder = VAEEncoder(shape, latent_dim=128)
         self.calc_mean = MLP([128, nhid], last_activation=False)
         self.calc_logvar = MLP([128, nhid], last_activation=False)
         self.classification = MLP([128, n_classes], last_activation=False)
-        self.decoder = Decoder(shape, nhid)
+        self.decoder = VAEDecoder(shape, nhid)
 
     def get_features(self, x):
         """
@@ -217,9 +217,11 @@ def VAE_loss(X, forward_output):
     This is the criterion for VAE training loop.
 
     :param X: Original input batch.
-    :param X_hat: Reconstructed input after subsequent Encoder and Decoder.
-    :param mean: mean of the VAE output distribution.
-    :param logvar: logvar of the VAE output distribution.
+    :param forward_output: Return value of a VAE.forward() call. 
+                Triplet consisting of (X_hat, mean. logvar), ie.
+                (Reconstructed input after subsequent Encoder and Decoder, 
+                mean of the VAE output distribution, 
+                logvar of the VAE output distribution)
     '''
     X_hat, mean, logvar = forward_output
     reconstruction_loss = MSE_loss(X_hat, X)
diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index e7c9f2f3c..5365feed7 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -14,6 +14,7 @@
 
 """
 
+from copy import deepcopy
 from avalanche.benchmarks.utils.data_loader import ReplayDataLoader
 from avalanche.benchmarks.utils import AvalancheDataset
 from avalanche.core import SupervisedPlugin
@@ -76,7 +77,7 @@ def __init__(self, generator=None, mem_size: int = 200,
         self.untrained_solver = untrained_solver
         self.model_is_generator = False
 
-    def before_training(self, strategy, *args, **kwargs):
+    def before_training(self, strategy: "SupervisedTemplate", *args, **kwargs):
         """Checks whether we are using a user defined external generator 
         or we use the strategy's model as the generator. 
         If the generator is None after initialization 
@@ -90,52 +91,55 @@ def before_training_exp(self, strategy: "SupervisedTemplate",
                             num_workers: int = 0, shuffle: bool = True,
                             **kwargs):
         """
-        ReplayDataloader to build batches containing examples from both, 
-        data sampled from the generator and the training dataset.
+        Make deep copies of generator and solver before training new experience.
         """
-
         if self.untrained_solver:
             # The solver needs to be trained before labelling generated data and
             # the generator needs to be trained before we can sample.
-            self.untrained_solver = False
             return
+        self.old_generator = deepcopy(self.generator)
+        self.old_generator.eval()
+        if not self.model_is_generator:
+            self.old_model = deepcopy(strategy.model)
+            self.old_model.eval()
+
+    def after_training_exp(self, strategy: "SupervisedTemplate",
+                           num_workers: int = 0, shuffle: bool = True,
+                           **kwargs):
+        """
+        Set untrained_solver boolean to False after (the first) experience,
+        in order to start training with replay data from the second experience.
+        """
+        self.untrained_solver = False
 
-        # Sample data from generator
-        memory = self.generator.generate(
-            len(strategy.adapted_dataset) *
-            (strategy.experience.current_experience)).to(strategy.device)
-        # Label the generated data using the current solver model, 
-        # in case there is a solver
+    def before_training_iteration(self, strategy: "SupervisedTemplate",
+                                  **kwargs):
+        """
+        Generating and appending replay data to current minibatch before 
+        each training iteration.
+        """
+        if self.untrained_solver:
+            # The solver needs to be trained before labelling generated data and
+            # the generator needs to be trained before we can sample.
+            return
+        # extend X with replay data
+        replay = self.old_generator.generate(
+            len(strategy.mbatch[0]) * (strategy.experience.current_experience)
+            ).to(strategy.device)  
+        strategy.mbatch[0] = torch.cat([strategy.mbatch[0], replay], dim=0)
+        # extend y with predicted labels (or mock labels if model==generator)
         if not self.model_is_generator:
-            strategy.model.eval()
             with torch.no_grad():
-                memory_output = strategy.model(memory).argmax(dim=-1)
-            strategy.model.train()
+                replay_output = self.old_model(replay).argmax(dim=-1)
         else:
             # Mock labels:
-            memory_output = torch.zeros(memory.shape[0])
-        # Create an AvalancheDataset from memory data and labels
-        memory = AvalancheDataset(torch.utils.data.TensorDataset(
-            memory.detach().cpu(), memory_output.detach().cpu()))
-
-        batch_size = self.batch_size
-        if batch_size is None:
-            batch_size = strategy.train_mb_size
-
-        batch_size_mem = self.batch_size_mem
-        if batch_size_mem is None:
-            batch_size_mem = strategy.train_mb_size
-        # Update strategy's dataloader by interleaving 
-        # current experience's data with generated data.
-        strategy.dataloader = ReplayDataLoader(
-            strategy.adapted_dataset,
-            memory,
-            batch_size=batch_size,
-            batch_size_mem=batch_size_mem *
-            (strategy.experience.current_experience),
-            task_balanced_dataloader=self.task_balanced_dataloader,
-            num_workers=num_workers,
-            shuffle=shuffle)
+            replay_output = torch.zeros(replay.shape[0])
+        strategy.mbatch[1] = torch.cat(
+            [strategy.mbatch[1], replay_output.to(strategy.device)], dim=0)
+        # extend task id batch (we implicitley assume a task-free case)
+        strategy.mbatch[-1] = torch.cat([strategy.mbatch[-1], torch.ones(
+            replay.shape[0]).to(strategy.device) * strategy.mbatch[-1][0]],
+             dim=0)
 
 
 class TrainGeneratorAfterExpPlugin(SupervisedPlugin):

From c9f5a230b8f2aaf72531eb710564b316ec4dc121 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Mar 2022 18:09:17 +0200
Subject: [PATCH 41/50] Move general modules from VAE to utils.

---
 avalanche/models/generator.py | 45 +----------------------------------
 avalanche/models/utils.py     | 45 ++++++++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 08a269f2a..222dcbde5 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -19,9 +19,8 @@
 from matplotlib import transforms
 import torch
 import torch.nn as nn
-from collections import OrderedDict
 from torchvision import transforms
-
+from avalanche.models.utils import MLP, Flatten
 from avalanche.models.base_model import BaseModel
 
 
@@ -44,48 +43,6 @@ def generate(self, batch_size=None):
 ###########################
 
 
-class Flatten(nn.Module):
-    '''
-    Simple nn.Module to flatten each tensor of a batch of tensors.
-    '''
-
-    def __init__(self):
-        super(Flatten, self).__init__()
-
-    def forward(self, x):
-        batch_size = x.shape[0]
-        return x.view(batch_size, -1)
-
-
-class MLP(nn.Module):
-    '''
-    Simple nn.Module to create a multi-layer perceptron 
-    with BatchNorm and ReLU activations.
-
-    :param hidden_size: An array indicating the number of neurons in each layer.
-    :type hidden_size: int[]
-    :param last_activation: Indicates whether to add BatchNorm and ReLU 
-                            after the last layer.
-    :type last_activation: Boolean
-    '''
-
-    def __init__(self, hidden_size, last_activation=True):
-        super(MLP, self).__init__()
-        q = []
-        for i in range(len(hidden_size)-1):
-            in_dim = hidden_size[i]
-            out_dim = hidden_size[i+1]
-            q.append(("Linear_%d" % i, nn.Linear(in_dim, out_dim)))
-            if (i < len(hidden_size)-2) or ((i == len(hidden_size) - 2)
-                                            and (last_activation)):
-                q.append(("BatchNorm_%d" % i, nn.BatchNorm1d(out_dim)))
-                q.append(("ReLU_%d" % i, nn.ReLU(inplace=True)))
-        self.mlp = nn.Sequential(OrderedDict(q))
-
-    def forward(self, x):
-        return self.mlp(x)
-
-
 class VAEEncoder(nn.Module):
     '''
     Encoder part of the VAE, computer the latent represenations of the input.
diff --git a/avalanche/models/utils.py b/avalanche/models/utils.py
index 0f08297e3..38fdf4b33 100644
--- a/avalanche/models/utils.py
+++ b/avalanche/models/utils.py
@@ -1,6 +1,7 @@
 from avalanche.benchmarks.utils import AvalancheDataset
 from avalanche.models.dynamic_modules import MultiTaskModule, DynamicModule
 import torch.nn as nn
+from collections import OrderedDict
 
 
 def avalanche_forward(model, x, task_labels):
@@ -59,4 +60,46 @@ def add_hooks(self, model):
         )
 
 
-__all__ = ["avalanche_forward", "FeatureExtractorBackbone"]
+class Flatten(nn.Module):
+    '''
+    Simple nn.Module to flatten each tensor of a batch of tensors.
+    '''
+
+    def __init__(self):
+        super(Flatten, self).__init__()
+
+    def forward(self, x):
+        batch_size = x.shape[0]
+        return x.view(batch_size, -1)
+
+
+class MLP(nn.Module):
+    '''
+    Simple nn.Module to create a multi-layer perceptron 
+    with BatchNorm and ReLU activations.
+
+    :param hidden_size: An array indicating the number of neurons in each layer.
+    :type hidden_size: int[]
+    :param last_activation: Indicates whether to add BatchNorm and ReLU 
+                            after the last layer.
+    :type last_activation: Boolean
+    '''
+
+    def __init__(self, hidden_size, last_activation=True):
+        super(MLP, self).__init__()
+        q = []
+        for i in range(len(hidden_size)-1):
+            in_dim = hidden_size[i]
+            out_dim = hidden_size[i+1]
+            q.append(("Linear_%d" % i, nn.Linear(in_dim, out_dim)))
+            if (i < len(hidden_size)-2) or ((i == len(hidden_size) - 2)
+                                            and (last_activation)):
+                q.append(("BatchNorm_%d" % i, nn.BatchNorm1d(out_dim)))
+                q.append(("ReLU_%d" % i, nn.ReLU(inplace=True)))
+        self.mlp = nn.Sequential(OrderedDict(q))
+
+    def forward(self, x):
+        return self.mlp(x)
+
+
+__all__ = ["avalanche_forward", "FeatureExtractorBackbone", "MLP", "Flatten"]

From d9c1f1c234f3021480f4ebda2b7b5ebcbd21f815 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Mar 2022 18:24:27 +0200
Subject: [PATCH 42/50] Add condition as an input to the abstract generator
 class.

---
 avalanche/models/generator.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 222dcbde5..43cc92c27 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -30,11 +30,15 @@ class Generator(BaseModel):
     """
 
     @abstractmethod
-    def generate(self, batch_size=None):
+    def generate(self, batch_size=None, condition=None):
         """
         Lets the generator sample random samples.
         Output is either a single sample or, if provided,
         a batch of samples of size "batch_size" 
+
+        :param batch_size: Number of samples to generate
+        :param condition: Possible condition for a condotional generator 
+                          (e.g. a class label)
         """
 
 

From 3f096b10f6a4969aa7c3d49e545810e08dfa2121 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Mar 2022 18:52:45 +0200
Subject: [PATCH 43/50] Clarify confusing generator generator_strategy naming.

---
 avalanche/training/plugins/generative_replay.py    | 10 +++++-----
 avalanche/training/supervised/strategy_wrappers.py |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index 5365feed7..aae7ce49d 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -15,9 +15,8 @@
 """
 
 from copy import deepcopy
-from avalanche.benchmarks.utils.data_loader import ReplayDataLoader
-from avalanche.benchmarks.utils import AvalancheDataset
 from avalanche.core import SupervisedPlugin
+from avalanche.training.templates.base import BaseTemplate
 from avalanche.training.templates.supervised import SupervisedTemplate
 import torch
 
@@ -56,7 +55,8 @@ class GenerativeReplayPlugin(SupervisedPlugin):
         before training the first experience. Default to True.
     """
 
-    def __init__(self, generator=None, mem_size: int = 200, 
+    def __init__(self, generator_strategy: "BaseTemplate" = None, 
+                 mem_size: int = 200, 
                  batch_size: int = None,
                  batch_size_mem: int = None,
                  task_balanced_dataloader: bool = False,
@@ -69,9 +69,9 @@ def __init__(self, generator=None, mem_size: int = 200,
         self.batch_size = batch_size
         self.batch_size_mem = batch_size_mem
         self.task_balanced_dataloader = task_balanced_dataloader
-        self.generator_strategy = generator
+        self.generator_strategy = generator_strategy
         if self.generator_strategy:
-            self.generator = generator.model
+            self.generator = generator_strategy.model
         else: 
             self.generator = None
         self.untrained_solver = untrained_solver
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 5de4788d8..c62edb540 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -365,7 +365,7 @@ def __init__(
                 eval_mb_size=32, device=device,
                 plugins=[GenerativeReplayPlugin()])
 
-        rp = GenerativeReplayPlugin(generator=self.generator_strategy)
+        rp = GenerativeReplayPlugin(generator_strategy=self.generator_strategy)
 
         tgp = TrainGeneratorAfterExpPlugin()
 

From 03150d1b324729ce287a47414ec5b638a5070fc6 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Mar 2022 19:14:47 +0200
Subject: [PATCH 44/50] Update documentation of the GenerativeReplayPlugin

---
 .../training/plugins/generative_replay.py     | 32 ++++---------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index aae7ce49d..f8cdfaeed 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -25,13 +25,9 @@ class GenerativeReplayPlugin(SupervisedPlugin):
     """
     Experience generative replay plugin.
 
-    Updates the Dataloader of a strategy before training an experience
-    by sampling a generator model and weaving the replay data into
-    the original training data. 
-
-    The examples in the created mini-batch contain one part of the original data
-    and one part of generative data for each class 
-    that has been encountered so far.
+    Updates the current mbatch of a strategy before training an experience
+    by sampling a generator model and concatenating the replay data to the 
+    current batch. 
 
     In this version of the plugin the number of replay samples is 
     increased with each new experience. Another way to implempent 
@@ -39,16 +35,10 @@ class GenerativeReplayPlugin(SupervisedPlugin):
     importance to the replayed data as the number of experiences 
     increases. This will be implemented as an option for the user soon.
 
-    :param batch_size: the size of the data batch. If set to `None`, it
-        will be set equal to the strategy's batch size.
-    :param batch_size_mem: the size of the memory batch. If
-        `task_balanced_dataloader` is set to True, it must be greater than or
-        equal to the number of tasks. If its value is set to `None`
-        (the default value), it will be automatically set equal to the
-        data batch size.
-    :param task_balanced_dataloader: if True, buffer data loaders will be
-            task-balanced, otherwise it will create a single dataloader for the
-            buffer samples.
+    :param generator_strategy: In case the plugin is applied to a non-generative
+     model (e.g. a simple classifier), this should contain an Avalanche strategy 
+     for a model that implements a 'generate' method 
+     (see avalanche.models.generator.Generator). Defaults to None.
     :param untrained_solver: if True we assume this is the beginning of 
         a continual learning task and add replay data only from the second 
         experience onwards, otherwise we sample and add generative replay data
@@ -56,19 +46,11 @@ class GenerativeReplayPlugin(SupervisedPlugin):
     """
 
     def __init__(self, generator_strategy: "BaseTemplate" = None, 
-                 mem_size: int = 200, 
-                 batch_size: int = None,
-                 batch_size_mem: int = None,
-                 task_balanced_dataloader: bool = False,
                  untrained_solver: bool = True):
         '''
         Init.
         '''
         super().__init__()
-        self.mem_size = mem_size
-        self.batch_size = batch_size
-        self.batch_size_mem = batch_size_mem
-        self.task_balanced_dataloader = task_balanced_dataloader
         self.generator_strategy = generator_strategy
         if self.generator_strategy:
             self.generator = generator_strategy.model

From 4f5246f65d499df2e9015bbdd4d9fa2122c1f378 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Mar 2022 19:38:31 +0200
Subject: [PATCH 45/50] before_training doc string

---
 avalanche/training/plugins/generative_replay.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index f8cdfaeed..11a3db38b 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -63,7 +63,9 @@ def before_training(self, strategy: "SupervisedTemplate", *args, **kwargs):
         """Checks whether we are using a user defined external generator 
         or we use the strategy's model as the generator. 
         If the generator is None after initialization 
-        we assume that strategy.model is the generator."""
+        we assume that strategy.model is the generator.
+        (e.g. this would be the case when training a VAE with 
+        generative replay)"""
         if not self.generator_strategy:
             self.generator_strategy = strategy
             self.generator = strategy.model

From 746e4e3b72b2d396570da2f4c8f5cd170c5058e4 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Mar 2022 19:48:27 +0200
Subject: [PATCH 46/50] Renaming of VAE models.

---
 avalanche/models/generator.py                  | 18 +++++++++---------
 .../training/supervised/strategy_wrappers.py   |  4 ++--
 examples/generative_replay_MNIST_generator.py  |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/avalanche/models/generator.py b/avalanche/models/generator.py
index 43cc92c27..6d8a4440c 100644
--- a/avalanche/models/generator.py
+++ b/avalanche/models/generator.py
@@ -47,7 +47,7 @@ def generate(self, batch_size=None, condition=None):
 ###########################
 
 
-class VAEEncoder(nn.Module):
+class VAEMLPEncoder(nn.Module):
     '''
     Encoder part of the VAE, computer the latent represenations of the input.
 
@@ -56,7 +56,7 @@ class VAEEncoder(nn.Module):
     '''
 
     def __init__(self, shape, latent_dim=128):
-        super(VAEEncoder, self).__init__()
+        super(VAEMLPEncoder, self).__init__()
         flattened_size = torch.Size(shape).numel()
         self.encode = nn.Sequential(
             Flatten(),
@@ -71,7 +71,7 @@ def forward(self, x, y=None):
         return x
 
 
-class VAEDecoder(nn.Module):
+class VAEMLPDecoder(nn.Module):
     '''
     Decoder part of the VAE. Reverses Encoder.
 
@@ -80,7 +80,7 @@ class VAEDecoder(nn.Module):
     '''
 
     def __init__(self, shape, nhid=16):
-        super(VAEDecoder, self).__init__()
+        super(VAEMLPDecoder, self).__init__()
         flattened_size = torch.Size(shape).numel()
         self.shape = shape
         self.decode = nn.Sequential(
@@ -98,7 +98,7 @@ def forward(self, z, y=None):
                                  .view(-1, *self.shape))
 
 
-class VAE(Generator, nn.Module):
+class MlpVAE(Generator, nn.Module):
     '''
     Variational autoencoder module: 
     fully-connected and suited for any input shape and type.
@@ -118,14 +118,14 @@ def __init__(self, shape, nhid=16, n_classes=10, device="cpu"):
         :param n_classes: Number of classes - 
                         defines classification head's dimension
         """
-        super(VAE, self).__init__()
+        super(MlpVAE, self).__init__()
         self.dim = nhid
         self.device = device
-        self.encoder = VAEEncoder(shape, latent_dim=128)
+        self.encoder = VAEMLPEncoder(shape, latent_dim=128)
         self.calc_mean = MLP([128, nhid], last_activation=False)
         self.calc_logvar = MLP([128, nhid], last_activation=False)
         self.classification = MLP([128, n_classes], last_activation=False)
-        self.decoder = VAEDecoder(shape, nhid)
+        self.decoder = VAEMLPDecoder(shape, nhid)
 
     def get_features(self, x):
         """
@@ -190,4 +190,4 @@ def VAE_loss(X, forward_output):
     return reconstruction_loss + KL_divergence
 
 
-__all__ = ["VAE", "VAE_loss"]
+__all__ = ["MlpVAE", "VAE_loss"]
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index c62edb540..4288c348f 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -34,7 +34,7 @@
 )
 from avalanche.training.templates.base import BaseTemplate
 from avalanche.training.templates.supervised import SupervisedTemplate
-from avalanche.models.generator import VAE, VAE_loss
+from avalanche.models.generator import MlpVAE, VAE_loss
 from avalanche.logging import InteractiveLogger
 
 
@@ -349,7 +349,7 @@ def __init__(
         else:
             # By default we use a fully-connected VAE as the generator.
             # model:
-            generator = VAE((1, 28, 28), nhid=2, device=device)
+            generator = MlpVAE((1, 28, 28), nhid=2, device=device)
             # optimzer:
             lr = 0.01
             from torch.optim import Adam
diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index 350ca7b17..a90f08de1 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -26,7 +26,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from avalanche.benchmarks import SplitMNIST
-from avalanche.models import VAE
+from avalanche.models import MlpVAE
 from avalanche.training.supervised import VAETraining
 from avalanche.training.plugins import GenerativeReplayPlugin
 from avalanche.logging import InteractiveLogger
@@ -60,7 +60,7 @@ def main(args):
     # ---------
 
     # MODEL CREATION
-    model = VAE((1, 28, 28), nhid=2)
+    model = MlpVAE((1, 28, 28), nhid=2)
 
     # choose some metrics and evaluation method
     interactive_logger = InteractiveLogger()

From d36e541a607bc92e78e28105723bb523040f9bbe Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Sun, 27 Mar 2022 19:50:50 +0200
Subject: [PATCH 47/50] Remove TrainGeneratorAfterExpPlugin plugins indexing.

---
 avalanche/training/plugins/generative_replay.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index 11a3db38b..bb468457f 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -138,4 +138,6 @@ def after_training_exp(self, strategy: "SupervisedTemplate", **kwargs):
         The training method expects an Experience object 
         with a 'dataset' parameter.
         """
-        strategy.plugins[1].generator_strategy.train(strategy.experience) 
+        for plugin in strategy.plugins:
+            if type(plugin) is GenerativeReplayPlugin:
+                plugin.generator_strategy.train(strategy.experience) 

From 99427ac66ac6edd788378798df39fcff4bc9a581 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Fri, 1 Apr 2022 14:26:37 +0200
Subject: [PATCH 48/50] Make increasing replay batch size optional.

---
 .../training/plugins/generative_replay.py     | 27 ++++++++++++++++---
 .../training/supervised/strategy_wrappers.py  |  6 ++---
 examples/generative_replay_MNIST_generator.py | 24 +++--------------
 examples/generative_replay_splitMNIST.py      | 19 ++-----------
 4 files changed, 31 insertions(+), 45 deletions(-)

diff --git a/avalanche/training/plugins/generative_replay.py b/avalanche/training/plugins/generative_replay.py
index bb468457f..0a5d12c89 100644
--- a/avalanche/training/plugins/generative_replay.py
+++ b/avalanche/training/plugins/generative_replay.py
@@ -43,10 +43,18 @@ class GenerativeReplayPlugin(SupervisedPlugin):
         a continual learning task and add replay data only from the second 
         experience onwards, otherwise we sample and add generative replay data
         before training the first experience. Default to True.
+    :param replay_size: The user can specify the batch size of replays that 
+        should be added to each data batch. By default each data batch will be 
+        matched with replays of the same number.
+    :param increasing_replay_size: If set to True, each experience this will 
+        double the amount of replay data added to each data batch. The effect 
+        will be that the older experiences will gradually increase in importance
+        to the final loss.
     """
 
     def __init__(self, generator_strategy: "BaseTemplate" = None, 
-                 untrained_solver: bool = True):
+                 untrained_solver: bool = True, replay_size: int = None,
+                 increasing_replay_size: bool = False):
         '''
         Init.
         '''
@@ -58,6 +66,8 @@ def __init__(self, generator_strategy: "BaseTemplate" = None,
             self.generator = None
         self.untrained_solver = untrained_solver
         self.model_is_generator = False
+        self.replay_size = replay_size
+        self.increasing_replay_size = increasing_replay_size
 
     def before_training(self, strategy: "SupervisedTemplate", *args, **kwargs):
         """Checks whether we are using a user defined external generator 
@@ -106,10 +116,19 @@ def before_training_iteration(self, strategy: "SupervisedTemplate",
             # The solver needs to be trained before labelling generated data and
             # the generator needs to be trained before we can sample.
             return
+        # determine how many replay data points to generate
+        if self.replay_size:
+            number_replays_to_generate = self.replay_size
+        else:
+            if self.increasing_replay_size:
+                number_replays_to_generate = len(
+                    strategy.mbatch[0]) * (
+                        strategy.experience.current_experience)
+            else:
+                number_replays_to_generate = len(strategy.mbatch[0])
         # extend X with replay data
-        replay = self.old_generator.generate(
-            len(strategy.mbatch[0]) * (strategy.experience.current_experience)
-            ).to(strategy.device)  
+        replay = self.old_generator.generate(number_replays_to_generate
+                                             ).to(strategy.device)  
         strategy.mbatch[0] = torch.cat([strategy.mbatch[0], replay], dim=0)
         # extend y with predicted labels (or mock labels if model==generator)
         if not self.model_is_generator:
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 4288c348f..6d4a816f1 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -360,9 +360,9 @@ def __init__(
             self.generator_strategy = VAETraining(
                 model=generator, 
                 optimizer=optimizer_generator,
-                criterion=VAE_loss, train_mb_size=64, 
-                train_epochs=10,
-                eval_mb_size=32, device=device,
+                criterion=VAE_loss, train_mb_size=train_mb_size, 
+                train_epochs=train_epochs,
+                eval_mb_size=eval_mb_size, device=device,
                 plugins=[GenerativeReplayPlugin()])
 
         rp = GenerativeReplayPlugin(generator_strategy=self.generator_strategy)
diff --git a/examples/generative_replay_MNIST_generator.py b/examples/generative_replay_MNIST_generator.py
index a90f08de1..c22df5a96 100644
--- a/examples/generative_replay_MNIST_generator.py
+++ b/examples/generative_replay_MNIST_generator.py
@@ -3,8 +3,8 @@
 # Copyrights licensed under the MIT License.                                   #
 # See the accompanying LICENSE file for terms.                                 #
 #                                                                              #
-# Date: 12-10-2020                                                             #
-# Author(s): Vincenzo Lomonaco                                                 #
+# Date: 01-04-2022                                                             #
+# Author(s): Florian Mies                                                      #
 # E-mail: contact@continualai.org                                              #
 # Website: avalanche.continualai.org                                           #
 ################################################################################
@@ -39,31 +39,13 @@ def main(args):
         if torch.cuda.is_available() and args.cuda >= 0
         else "cpu"
     )
-    n_batches = 5
-    # ---------
-
-    # --- TRANSFORMATIONS
-    train_transform = transforms.Compose(
-        [
-            RandomCrop(28, padding=4),
-            ToTensor(),
-            transforms.Normalize((0.1307,), (0.3081,)),
-        ]
-    )
-    test_transform = transforms.Compose(
-        [ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
-    )
-    # ---------
 
     # --- SCENARIO CREATION
     scenario = SplitMNIST(n_experiences=10, seed=1234)
     # ---------
 
     # MODEL CREATION
-    model = MlpVAE((1, 28, 28), nhid=2)
-
-    # choose some metrics and evaluation method
-    interactive_logger = InteractiveLogger()
+    model = MlpVAE((1, 28, 28), nhid=2, device=device)
 
     # CREATE THE STRATEGY INSTANCE (GenerativeReplay)
     cl_strategy = VAETraining(
diff --git a/examples/generative_replay_splitMNIST.py b/examples/generative_replay_splitMNIST.py
index 1467ae3e4..05c8cf62c 100644
--- a/examples/generative_replay_splitMNIST.py
+++ b/examples/generative_replay_splitMNIST.py
@@ -3,8 +3,8 @@
 # Copyrights licensed under the MIT License.                                   #
 # See the accompanying LICENSE file for terms.                                 #
 #                                                                              #
-# Date: 12-10-2020                                                             #
-# Author(s): Vincenzo Lomonaco                                                 #
+# Date: 01-04-2022                                                             #
+# Author(s): Florian Mies                                                      #
 # E-mail: contact@continualai.org                                              #
 # Website: avalanche.continualai.org                                           #
 ################################################################################
@@ -43,21 +43,6 @@ def main(args):
         if torch.cuda.is_available() and args.cuda >= 0
         else "cpu"
     )
-    n_batches = 5
-    # ---------
-
-    # --- TRANSFORMATIONS
-    train_transform = transforms.Compose(
-        [
-            RandomCrop(28, padding=4),
-            ToTensor(),
-            transforms.Normalize((0.1307,), (0.3081,)),
-        ]
-    )
-    test_transform = transforms.Compose(
-        [ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
-    )
-    # ---------
 
     # --- SCENARIO CREATION
     scenario = SplitMNIST(n_experiences=10, seed=1234)

From 1a8becf3b024aca1e9ce5d2bab44e8192706acc3 Mon Sep 17 00:00:00 2001
From: florian <florian.mies@fu-berlin.de>
Date: Fri, 1 Apr 2022 14:49:13 +0200
Subject: [PATCH 49/50] Pass new arguments from strategy to plugin.

---
 avalanche/training/supervised/strategy_wrappers.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 6d4a816f1..9417a3ce9 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -313,6 +313,8 @@ def __init__(
         evaluator: EvaluationPlugin = default_evaluator,
         eval_every=-1,
         generator_strategy: BaseTemplate = None,
+        replay_size: int = None,
+        increasing_replay_size: bool = False,
         **base_kwargs
     ):
         """
@@ -363,9 +365,14 @@ def __init__(
                 criterion=VAE_loss, train_mb_size=train_mb_size, 
                 train_epochs=train_epochs,
                 eval_mb_size=eval_mb_size, device=device,
-                plugins=[GenerativeReplayPlugin()])
-
-        rp = GenerativeReplayPlugin(generator_strategy=self.generator_strategy)
+                plugins=[GenerativeReplayPlugin(
+                    replay_size=replay_size,
+                    increasing_replay_size=increasing_replay_size)])
+
+        rp = GenerativeReplayPlugin(
+            generator_strategy=self.generator_strategy,
+            replay_size=replay_size,
+            increasing_replay_size=increasing_replay_size)
 
         tgp = TrainGeneratorAfterExpPlugin()
 

From dfa1d690f1bec660455bec335e70aae5390ffa12 Mon Sep 17 00:00:00 2001
From: Antonio Carta <antonio.carta@di.unipi.it>
Date: Fri, 8 Apr 2022 10:16:56 +0200
Subject: [PATCH 50/50] update multihead test

---
 tests/training/test_strategies_accuracy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/training/test_strategies_accuracy.py b/tests/training/test_strategies_accuracy.py
index e06c1fbe3..c0c93681c 100644
--- a/tests/training/test_strategies_accuracy.py
+++ b/tests/training/test_strategies_accuracy.py
@@ -74,10 +74,10 @@ def test_multihead_cumulative(self):
             model,
             optimizer,
             criterion,
-            train_mb_size=32,
+            train_mb_size=64,
             device=get_device(),
             eval_mb_size=512,
-            train_epochs=3,
+            train_epochs=6,
             evaluator=evalp,
         )
         benchmark = get_fast_benchmark(use_task_labels=True)