Added embedding folder from the https://github.com/LJeub/Local2Global…

…_embedding repository
OxfordRSE · Nov 11, 2024 · a783b85 · a783b85
1 parent 513373f
commit a783b85
Show file tree

Hide file tree

Showing 28 changed files with 1,199 additions and 0 deletions.
diff --git a/l2gv2/embedding/__init__.py b/l2gv2/embedding/__init__.py
@@ -0,0 +1,20 @@
+#  Copyright (c) 2021. Lucas G. S. Jeub
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a copy
+#  of this software and associated documentation files (the "Software"), to deal
+#  in the Software without restriction, including without limitation the rights
+#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#  copies of the Software, and to permit persons to whom the Software is
+#  furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included in all
+#  copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#  SOFTWARE.
+
diff --git a/l2gv2/embedding/dgi/LICENSE b/l2gv2/embedding/dgi/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Petar Veličković, 2021 Lucas G. S. Jeub
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/l2gv2/embedding/dgi/README.md b/l2gv2/embedding/dgi/README.md
@@ -0,0 +1,30 @@
+# DGI
+
+This code is adapted from the reference implementation of Deep Graph Infomax (Veličković *et al.*, ICLR 2019): [https://arxiv.org/abs/1809.10341](https://arxiv.org/abs/1809.10341)
+
+![](https://camo.githubusercontent.com/f62a0b987d8a1a140a9f3ba14baf4caa45dfbcad/68747470733a2f2f7777772e64726f70626f782e636f6d2f732f757a783779677761637a76747031302f646565705f67726170685f696e666f6d61782e706e673f7261773d31)
+
+The original reference implementation is available at https://github.com/PetarV-/DGI
+
+## Overview
+Here we provide an implementation of Deep Graph Infomax (DGI) in PyTorch that works with data in [pytorch-geometric](https://github.com/rusty1s/pytorch_geometric) edge-index format. The repository is organised as follows:
+- `models/` contains the implementation of the DGI pipeline (`dgi.py`);
+- `layers/` contains the implementation of a GCN layer (`gcn.py`), the averaging readout (`readout.py`), and the bilinear discriminator (`discriminator.py`);
+- `utils/` contains the loss function for training (`loss.py`).
+
+## Reference
+If you make advantage of DGI in your research, please cite the following in your manuscript:
+
+```
+@inproceedings{
+velickovic2018deep,
+title="{Deep Graph Infomax}",
+author={Petar Veli{\v{c}}kovi{\'{c}} and William Fedus and William L. Hamilton and Pietro Li{\`{o}} and Yoshua Bengio and R Devon Hjelm},
+booktitle={International Conference on Learning Representations},
+year={2019},
+url={https://openreview.net/forum?id=rklz9iAcKQ},
+}
+```
+
+## License
+MIT
diff --git a/l2gv2/embedding/dgi/__init__.py b/l2gv2/embedding/dgi/__init__.py
@@ -0,0 +1,2 @@
+from .models import DGI
+from .utils import DGILoss
diff --git a/l2gv2/embedding/dgi/execute.py b/l2gv2/embedding/dgi/execute.py
@@ -0,0 +1,135 @@
+import torch
+import torch.nn as nn
+import torch_geometric as tg
+import argparse
+
+
+from .models import DGI, LogReg
+from .utils.loss import DGILoss
+
+
+parser = argparse.ArgumentParser(description="DGI test script")
+parser.add_argument('--datapath', default='/tmp/cora')
+args = parser.parse_args()
+
+dataset = 'cora'
+device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+
+loss_fun = DGILoss()
+
+# training params
+batch_size = 1
+nb_epochs = 10000
+patience = 20
+lr = 0.001
+l2_coef = 0.0
+drop_prob = 0.0
+hid_units = 512
+sparse = True
+nonlinearity = 'prelu'  # special name to separate parameters
+
+data = tg.datasets.Planetoid(name='Cora', root=args.datapath)[0]
+data = data.to(device)
+r_sum = data.x.sum(dim=1)
+r_sum[r_sum == 0] = 1.0  # avoid division by zero
+data.x /= r_sum[:, None]
+
+# adj, features, labels, idx_train, idx_val, idx_test = process.load_data(dataset)
+# features, _ = process.preprocess_features(features)
+
+nb_nodes = data.num_nodes
+ft_size = data.num_features
+nb_classes = data.y.max().item() + 1
+
+# adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
+
+# if sparse:
+#     sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj)
+# else:
+#     adj = (adj + sp.eye(adj.shape[0])).todense()
+
+# features = torch.FloatTensor(features[np.newaxis])
+# if not sparse:
+#     adj = torch.FloatTensor(adj[np.newaxis])
+# labels = torch.FloatTensor(labels[np.newaxis])
+# idx_train = torch.LongTensor(idx_train)
+# idx_val = torch.LongTensor(idx_val)
+# idx_test = torch.LongTensor(idx_test)
+
+model = DGI(ft_size, hid_units, nonlinearity)
+model = model.to(device)
+
+optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef)
+
+xent = nn.CrossEntropyLoss()
+cnt_wait = 0
+best = 1e9
+best_t = 0
+
+for epoch in range(nb_epochs):
+    model.train()
+    optimiser.zero_grad()
+    loss = loss_fun(model, data)
+
+    print('Loss:', loss)
+
+    if loss < best:
+        best = loss
+        best_t = epoch
+        cnt_wait = 0
+        torch.save(model.state_dict(), 'best_dgi.pkl')
+    else:
+        cnt_wait += 1
+
+    if cnt_wait == patience:
+        print('Early stopping!')
+        break
+
+    loss.backward()
+    optimiser.step()
+
+print('Loading {}th epoch'.format(best_t))
+model.load_state_dict(torch.load('best_dgi.pkl'))
+
+embeds = model.embed(data)
+train_embs = embeds[data.train_mask]
+val_embs = embeds[data.val_mask]
+test_embs = embeds[data.test_mask]
+#
+train_lbls = data.y[data.train_mask]
+val_lbls = data.y[data.val_mask]
+test_lbls = data.y[data.test_mask]
+
+tot = torch.zeros(1, device=device)
+accs = []
+
+for _ in range(50):
+    log = LogReg(hid_units, nb_classes).to(device)
+
+    opt = torch.optim.Adam(log.parameters(), lr=0.01, weight_decay=0.0)
+
+    pat_steps = 0
+    best_acc = torch.zeros(1, device=device)
+
+    for _ in range(100):
+        log.train()
+        opt.zero_grad()
+
+        logits = log(train_embs)
+        loss = xent(logits, train_lbls)
+
+        loss.backward()
+        opt.step()
+
+    logits = log(test_embs)
+    preds = torch.argmax(logits, dim=1)
+    acc = torch.sum(preds == test_lbls).float() / test_lbls.shape[0]
+    accs.append(acc * 100)
+    print(acc)
+    tot += acc
+
+print('Average accuracy:', tot / 50)
+
+accs = torch.stack(accs)
+print(accs.mean())
+print(accs.std())
diff --git a/l2gv2/embedding/dgi/layers/__init__.py b/l2gv2/embedding/dgi/layers/__init__.py
@@ -0,0 +1,3 @@
+from .gcn import GCN
+from .readout import AvgReadout
+from .discriminator import Discriminator
diff --git a/l2gv2/embedding/dgi/layers/discriminator.py b/l2gv2/embedding/dgi/layers/discriminator.py
@@ -0,0 +1,33 @@
+import torch
+import torch.nn as nn
+
+
+class Discriminator(nn.Module):
+    def __init__(self, n_h):
+        super(Discriminator, self).__init__()
+        self.f_k = nn.Bilinear(n_h, n_h, 1)
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        for m in self.modules():
+            if isinstance(m, nn.Bilinear):
+                torch.nn.init.xavier_uniform_(m.weight.data)
+                if m.bias is not None:
+                    m.bias.data.fill_(0.0)
+
+    def forward(self, c, h_pl, h_mi, s_bias1=None, s_bias2=None):
+        c_x = torch.unsqueeze(c, 0)
+        c_x = c_x.expand_as(h_pl)
+
+        sc_1 = torch.squeeze(self.f_k(h_pl, c_x), 1)
+        sc_2 = torch.squeeze(self.f_k(h_mi, c_x), 1)
+
+        if s_bias1 is not None:
+            sc_1 += s_bias1
+        if s_bias2 is not None:
+            sc_2 += s_bias2
+
+        logits = torch.cat((sc_1, sc_2), 0)
+
+        return logits
+
diff --git a/l2gv2/embedding/dgi/layers/gcn.py b/l2gv2/embedding/dgi/layers/gcn.py
@@ -0,0 +1,24 @@
+import torch.nn as nn
+import torch_geometric.nn as tg_nn
+
+
+class GCN(nn.Module):
+    def __init__(self, in_ft, out_ft, act, bias=True):
+        super(GCN, self).__init__()
+        self.conv = tg_nn.GCNConv(in_channels=in_ft, out_channels=out_ft, bias=bias)
+        self.act = nn.PReLU() if act == 'prelu' else act
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        self.conv.reset_parameters()
+        if hasattr(self.act, 'reset_parameters'):
+            self.act.reset_parameters()
+        elif isinstance(self.act, nn.PReLU):
+            self.act.weight.data.fill_(0.25)
+
+    # Shape of seq: (batch, nodes, features)
+    def forward(self, seq, adj):
+        out = self.conv(seq, adj)
+
+        return self.act(out)
+
diff --git a/l2gv2/embedding/dgi/layers/readout.py b/l2gv2/embedding/dgi/layers/readout.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# Applies an average on seq, of shape (batch, nodes, features)
+# While taking into account the masking of msk
+class AvgReadout(nn.Module):
+    def __init__(self):
+        super(AvgReadout, self).__init__()
+
+    def forward(self, seq, msk):
+        if msk is None:
+            return torch.mean(seq, 0)
+        else:
+            msk = torch.unsqueeze(msk, -1)
+            return torch.sum(seq * msk, 0) / torch.sum(msk)
+
diff --git a/l2gv2/embedding/dgi/models/__init__.py b/l2gv2/embedding/dgi/models/__init__.py
@@ -0,0 +1,2 @@
+from .dgi import DGI
+from .logreg import LogReg
diff --git a/l2gv2/embedding/dgi/models/dgi.py b/l2gv2/embedding/dgi/models/dgi.py
@@ -0,0 +1,37 @@
+import torch.nn as nn
+from ..layers import GCN, AvgReadout, Discriminator
+
+
+class DGI(nn.Module):
+    def __init__(self, n_in, n_h, activation='prelu'):
+        super(DGI, self).__init__()
+        self.gcn = GCN(n_in, n_h, activation)
+        self.read = AvgReadout()
+
+        self.sigm = nn.Sigmoid()
+
+        self.disc = Discriminator(n_h)
+
+    def reset_parameters(self):
+        for m in self.children():
+            if hasattr(m, 'reset_parameters'):
+                m.reset_parameters()
+
+    def forward(self, seq1, seq2, adj, msk, samp_bias1, samp_bias2):
+        h_1 = self.gcn(seq1, adj)
+
+        c = self.read(h_1, msk)
+        c = self.sigm(c)
+
+        h_2 = self.gcn(seq2, adj)
+
+        ret = self.disc(c, h_1, h_2, samp_bias1, samp_bias2)
+
+        return ret
+
+    # Detach the return variables
+    def embed(self, data, msk=None):
+        h_1 = self.gcn(data.x, data.edge_index)
+
+        return h_1.detach()
+
diff --git a/l2gv2/embedding/dgi/models/logreg.py b/l2gv2/embedding/dgi/models/logreg.py
@@ -0,0 +1,22 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+class LogReg(nn.Module):
+    def __init__(self, ft_in, nb_classes):
+        super(LogReg, self).__init__()
+        self.fc = nn.Linear(ft_in, nb_classes)
+
+        for m in self.modules():
+            self.weights_init(m)
+
+    def weights_init(self, m):
+        if isinstance(m, nn.Linear):
+            torch.nn.init.xavier_uniform_(m.weight.data)
+            if m.bias is not None:
+                m.bias.data.fill_(0.0)
+
+    def forward(self, seq):
+        ret = self.fc(seq)
+        return ret
+
diff --git a/l2gv2/embedding/dgi/utils/__init__.py b/l2gv2/embedding/dgi/utils/__init__.py
@@ -0,0 +1 @@
+from .loss import DGILoss
diff --git a/l2gv2/embedding/dgi/utils/loss.py b/l2gv2/embedding/dgi/utils/loss.py
@@ -0,0 +1,23 @@
+import torch_geometric as tg
+import torch
+
+
+class DGILoss(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.loss_fun = torch.nn.BCEWithLogitsLoss()
+
+    def forward(self, model, data: tg.data.Data):
+        device = data.edge_index.device
+        nb_nodes = data.num_nodes
+        idx = torch.randperm(nb_nodes, device=device)
+
+        shuf_fts = data.x[idx, :]
+
+        lbl_1 = torch.ones(nb_nodes, device=device)
+        lbl_2 = torch.zeros(nb_nodes, device=device)
+        lbl = torch.cat((lbl_1, lbl_2), 0)
+
+        logits = model(data.x, shuf_fts, data.edge_index, None, None, None)
+
+        return self.loss_fun(logits, lbl)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .models import DGI
		from .utils import DGILoss
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .dgi import DGI
		from .logreg import LogReg