adding delta theta update in comment

AvivSham · Feb 8, 2021 · df4367c · df4367c
1 parent ee8a875
commit df4367c
Show file tree

Hide file tree

Showing 129 changed files with 483 additions and 4,366 deletions.
diff --git a/.gitignore b/.gitignore
@@ -129,4 +129,5 @@ dmypy.json
 .pyre/
 
 data/
-.idea/
+.idea/
+wandb/
diff --git a/experiments/hetro/dataset.py b/experiments/hetro/dataset.py
@@ -11,12 +11,7 @@
 
 def get_datasets(data_name, dataroot, normalize=True, val_size=10000):
 
-    if data_name == 'mnist':
-        normalization = transforms.Normalize(
-            (0.1307,), (0.3081,)
-        )
-        data_obj = MNIST
-    elif data_name =='cifar10':
+    if data_name =='cifar10':
         normalization = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
         data_obj = CIFAR10
     elif data_name == 'cifar100':

diff --git a/experiments/hetro/local_layers/models.py b/experiments/hetro/local_layers/models.py
@@ -0,0 +1,104 @@
+import torch.nn.functional as F
+from torch import nn
+from torch.nn.utils import spectral_norm
+
+
+class LocalLayer(nn.Module):
+
+    def __init__(self, n_input=84, n_output=2, nonlinearity=False):
+        super().__init__()
+        self.nonlinearity = nonlinearity
+        layers = []
+        if nonlinearity:
+            layers.append(nn.ReLU())
+
+        layers.append(nn.Linear(n_input, n_output))
+        self.layer = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.layer(x)
+
+
+class CNNHyper(nn.Module):
+    def __init__(
+            self, n_nodes, embedding_dim, in_channels=3, out_dim=10, n_kernels=16, hidden_dim=100,
+            spec_norm=False, n_hidden=1):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.out_dim = out_dim
+        self.n_kernels = n_kernels
+        self.embeddings = nn.Embedding(num_embeddings=n_nodes, embedding_dim=embedding_dim)
+
+        layers = [
+            spectral_norm(nn.Linear(embedding_dim, hidden_dim)) if spec_norm else nn.Linear(embedding_dim, hidden_dim),
+        ]
+        for _ in range(n_hidden):
+            layers.append(nn.ReLU(inplace=True))
+            layers.append(
+                spectral_norm(nn.Linear(hidden_dim, hidden_dim)) if spec_norm else nn.Linear(hidden_dim, hidden_dim),
+            )
+
+        self.mlp = nn.Sequential(*layers)
+
+        self.c1_weights = nn.Linear(hidden_dim, self.n_kernels * self.in_channels * 5 * 5)
+        self.c1_bias = nn.Linear(hidden_dim, self.n_kernels)
+        self.c2_weights = nn.Linear(hidden_dim, 2 * self.n_kernels * self.n_kernels * 5 * 5)
+        self.c2_bias = nn.Linear(hidden_dim, 2 * self.n_kernels)
+        self.l1_weights = nn.Linear(hidden_dim, 120 * 2 * self.n_kernels * 5 * 5)
+        self.l1_bias = nn.Linear(hidden_dim, 120)
+        self.l2_weights = nn.Linear(hidden_dim, 84 * 120)
+        self.l2_bias = nn.Linear(hidden_dim, 84)
+        # self.l3_weights = nn.Linear(hidden_dim, self.out_dim * 84)
+        # self.l3_bias = nn.Linear(hidden_dim, self.out_dim)
+
+        if spec_norm:
+            self.c1_weights = spectral_norm(self.c1_weights)
+            self.c1_bias = spectral_norm(self.c1_bias)
+            self.c2_weights = spectral_norm(self.c2_weights)
+            self.c2_bias = spectral_norm(self.c2_bias)
+            self.l1_weights = spectral_norm(self.l1_weights)
+            self.l1_bias = spectral_norm(self.l1_bias)
+            self.l2_weights = spectral_norm(self.l2_weights)
+            self.l2_bias = spectral_norm(self.l2_bias)
+            # self.l3_weights = spectral_norm(self.l3_weights)
+            # self.l3_bias = spectral_norm(self.l3_bias)
+
+    def forward(self, idx):
+        emd = self.embeddings(idx)
+        features = self.mlp(emd)
+
+        weights = {
+            "conv1.weight": self.c1_weights(features).view(self.n_kernels, self.in_channels, 5, 5),
+            "conv1.bias": self.c1_bias(features).view(-1),
+            "conv2.weight": self.c2_weights(features).view(2 * self.n_kernels, self.n_kernels, 5, 5),
+            "conv2.bias": self.c2_bias(features).view(-1),
+            "fc1.weight": self.l1_weights(features).view(120, 2 * self.n_kernels * 5 * 5),
+            "fc1.bias": self.l1_bias(features).view(-1),
+            "fc2.weight": self.l2_weights(features).view(84, 120),
+            "fc2.bias": self.l2_bias(features).view(-1),
+            # "fc3.weight": self.l3_weights(features).view(self.out_dim, 84),
+            # "fc3.bias": self.l3_bias(features).view(-1),
+        }
+        return weights
+
+
+class CNNTarget(nn.Module):
+    def __init__(self, in_channels=3, n_kernels=16, out_dim=10):
+        super().__init__()
+
+        self.conv1 = nn.Conv2d(in_channels, n_kernels, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(n_kernels, 2 * n_kernels, 5)
+        self.fc1 = nn.Linear(2 * n_kernels * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        # self.fc3 = nn.Linear(84, out_dim)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = x.view(x.shape[0], -1)
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        # x = self.fc3(x)
+        return x
diff --git a/experiments/hetro/local_layers/node.py b/experiments/hetro/local_layers/node.py
@@ -0,0 +1,41 @@
+from experiments.hetro.dataset import gen_random_loaders
+
+
+class BaseNodesForLocal:
+    def __init__(
+        self,
+        data_name,
+        data_path,
+        n_nodes,
+        base_layer,
+        layer_config,
+        base_optimizer,
+        optimizer_config,
+        device,
+        batch_size=128,
+        classes_per_node=2,
+    ):
+
+        self.data_name = data_name
+        self.data_path = data_path
+        self.n_nodes = n_nodes
+        self.classes_per_node = classes_per_node
+        self.batch_size = batch_size
+
+        self.local_layers = [
+            base_layer(**layer_config).to(device) for _ in range(self.n_nodes)
+        ]
+        self.local_optimizers = [
+            base_optimizer(self.local_layers[i].parameters(), **optimizer_config) for i in range(self.n_nodes)
+        ]
+
+        self.train_loaders, self.val_loaders, self.test_loaders = gen_random_loaders(
+            self.data_name,
+            self.data_path,
+            self.n_nodes,
+            self.batch_size,
+            self.classes_per_node
+        )
+
+    def __len__(self):
+        return self.n_nodes
-Original file line number
+Diff line change
@@ Expand Up / @@ -129,4 +129,5 @@ dmypy.json @@
     .pyre/
     data/
-    .idea/
+    .idea/
+    wandb/