diff --git a/l2gv2/embedding/__init__.py b/l2gv2/embedding/__init__.py new file mode 100644 index 0000000..304f47d --- /dev/null +++ b/l2gv2/embedding/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + diff --git a/l2gv2/embedding/dgi/LICENSE b/l2gv2/embedding/dgi/LICENSE new file mode 100644 index 0000000..8918d22 --- /dev/null +++ b/l2gv2/embedding/dgi/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Petar Veličković, 2021 Lucas G. S. Jeub + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/l2gv2/embedding/dgi/README.md b/l2gv2/embedding/dgi/README.md new file mode 100644 index 0000000..e9292ca --- /dev/null +++ b/l2gv2/embedding/dgi/README.md @@ -0,0 +1,30 @@ +# DGI + +This code is adapted from the reference implementation of Deep Graph Infomax (Veličković *et al.*, ICLR 2019): [https://arxiv.org/abs/1809.10341](https://arxiv.org/abs/1809.10341) + +![](https://camo.githubusercontent.com/f62a0b987d8a1a140a9f3ba14baf4caa45dfbcad/68747470733a2f2f7777772e64726f70626f782e636f6d2f732f757a783779677761637a76747031302f646565705f67726170685f696e666f6d61782e706e673f7261773d31) + +The original reference implementation is available at https://github.com/PetarV-/DGI + +## Overview +Here we provide an implementation of Deep Graph Infomax (DGI) in PyTorch that works with data in [pytorch-geometric](https://github.com/rusty1s/pytorch_geometric) edge-index format. The repository is organised as follows: +- `models/` contains the implementation of the DGI pipeline (`dgi.py`); +- `layers/` contains the implementation of a GCN layer (`gcn.py`), the averaging readout (`readout.py`), and the bilinear discriminator (`discriminator.py`); +- `utils/` contains the loss function for training (`loss.py`). + +## Reference +If you make advantage of DGI in your research, please cite the following in your manuscript: + +``` +@inproceedings{ +velickovic2018deep, +title="{Deep Graph Infomax}", +author={Petar Veli{\v{c}}kovi{\'{c}} and William Fedus and William L. Hamilton and Pietro Li{\`{o}} and Yoshua Bengio and R Devon Hjelm}, +booktitle={International Conference on Learning Representations}, +year={2019}, +url={https://openreview.net/forum?id=rklz9iAcKQ}, +} +``` + +## License +MIT diff --git a/l2gv2/embedding/dgi/__init__.py b/l2gv2/embedding/dgi/__init__.py new file mode 100644 index 0000000..c008619 --- /dev/null +++ b/l2gv2/embedding/dgi/__init__.py @@ -0,0 +1,2 @@ +from .models import DGI +from .utils import DGILoss diff --git a/l2gv2/embedding/dgi/execute.py b/l2gv2/embedding/dgi/execute.py new file mode 100644 index 0000000..cc616f4 --- /dev/null +++ b/l2gv2/embedding/dgi/execute.py @@ -0,0 +1,135 @@ +import torch +import torch.nn as nn +import torch_geometric as tg +import argparse + + +from .models import DGI, LogReg +from .utils.loss import DGILoss + + +parser = argparse.ArgumentParser(description="DGI test script") +parser.add_argument('--datapath', default='/tmp/cora') +args = parser.parse_args() + +dataset = 'cora' +device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + +loss_fun = DGILoss() + +# training params +batch_size = 1 +nb_epochs = 10000 +patience = 20 +lr = 0.001 +l2_coef = 0.0 +drop_prob = 0.0 +hid_units = 512 +sparse = True +nonlinearity = 'prelu' # special name to separate parameters + +data = tg.datasets.Planetoid(name='Cora', root=args.datapath)[0] +data = data.to(device) +r_sum = data.x.sum(dim=1) +r_sum[r_sum == 0] = 1.0 # avoid division by zero +data.x /= r_sum[:, None] + +# adj, features, labels, idx_train, idx_val, idx_test = process.load_data(dataset) +# features, _ = process.preprocess_features(features) + +nb_nodes = data.num_nodes +ft_size = data.num_features +nb_classes = data.y.max().item() + 1 + +# adj = process.normalize_adj(adj + sp.eye(adj.shape[0])) + +# if sparse: +# sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj) +# else: +# adj = (adj + sp.eye(adj.shape[0])).todense() + +# features = torch.FloatTensor(features[np.newaxis]) +# if not sparse: +# adj = torch.FloatTensor(adj[np.newaxis]) +# labels = torch.FloatTensor(labels[np.newaxis]) +# idx_train = torch.LongTensor(idx_train) +# idx_val = torch.LongTensor(idx_val) +# idx_test = torch.LongTensor(idx_test) + +model = DGI(ft_size, hid_units, nonlinearity) +model = model.to(device) + +optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef) + +xent = nn.CrossEntropyLoss() +cnt_wait = 0 +best = 1e9 +best_t = 0 + +for epoch in range(nb_epochs): + model.train() + optimiser.zero_grad() + loss = loss_fun(model, data) + + print('Loss:', loss) + + if loss < best: + best = loss + best_t = epoch + cnt_wait = 0 + torch.save(model.state_dict(), 'best_dgi.pkl') + else: + cnt_wait += 1 + + if cnt_wait == patience: + print('Early stopping!') + break + + loss.backward() + optimiser.step() + +print('Loading {}th epoch'.format(best_t)) +model.load_state_dict(torch.load('best_dgi.pkl')) + +embeds = model.embed(data) +train_embs = embeds[data.train_mask] +val_embs = embeds[data.val_mask] +test_embs = embeds[data.test_mask] +# +train_lbls = data.y[data.train_mask] +val_lbls = data.y[data.val_mask] +test_lbls = data.y[data.test_mask] + +tot = torch.zeros(1, device=device) +accs = [] + +for _ in range(50): + log = LogReg(hid_units, nb_classes).to(device) + + opt = torch.optim.Adam(log.parameters(), lr=0.01, weight_decay=0.0) + + pat_steps = 0 + best_acc = torch.zeros(1, device=device) + + for _ in range(100): + log.train() + opt.zero_grad() + + logits = log(train_embs) + loss = xent(logits, train_lbls) + + loss.backward() + opt.step() + + logits = log(test_embs) + preds = torch.argmax(logits, dim=1) + acc = torch.sum(preds == test_lbls).float() / test_lbls.shape[0] + accs.append(acc * 100) + print(acc) + tot += acc + +print('Average accuracy:', tot / 50) + +accs = torch.stack(accs) +print(accs.mean()) +print(accs.std()) diff --git a/l2gv2/embedding/dgi/layers/__init__.py b/l2gv2/embedding/dgi/layers/__init__.py new file mode 100644 index 0000000..617140e --- /dev/null +++ b/l2gv2/embedding/dgi/layers/__init__.py @@ -0,0 +1,3 @@ +from .gcn import GCN +from .readout import AvgReadout +from .discriminator import Discriminator diff --git a/l2gv2/embedding/dgi/layers/discriminator.py b/l2gv2/embedding/dgi/layers/discriminator.py new file mode 100644 index 0000000..d95bbbe --- /dev/null +++ b/l2gv2/embedding/dgi/layers/discriminator.py @@ -0,0 +1,33 @@ +import torch +import torch.nn as nn + + +class Discriminator(nn.Module): + def __init__(self, n_h): + super(Discriminator, self).__init__() + self.f_k = nn.Bilinear(n_h, n_h, 1) + self.reset_parameters() + + def reset_parameters(self): + for m in self.modules(): + if isinstance(m, nn.Bilinear): + torch.nn.init.xavier_uniform_(m.weight.data) + if m.bias is not None: + m.bias.data.fill_(0.0) + + def forward(self, c, h_pl, h_mi, s_bias1=None, s_bias2=None): + c_x = torch.unsqueeze(c, 0) + c_x = c_x.expand_as(h_pl) + + sc_1 = torch.squeeze(self.f_k(h_pl, c_x), 1) + sc_2 = torch.squeeze(self.f_k(h_mi, c_x), 1) + + if s_bias1 is not None: + sc_1 += s_bias1 + if s_bias2 is not None: + sc_2 += s_bias2 + + logits = torch.cat((sc_1, sc_2), 0) + + return logits + diff --git a/l2gv2/embedding/dgi/layers/gcn.py b/l2gv2/embedding/dgi/layers/gcn.py new file mode 100644 index 0000000..2329674 --- /dev/null +++ b/l2gv2/embedding/dgi/layers/gcn.py @@ -0,0 +1,24 @@ +import torch.nn as nn +import torch_geometric.nn as tg_nn + + +class GCN(nn.Module): + def __init__(self, in_ft, out_ft, act, bias=True): + super(GCN, self).__init__() + self.conv = tg_nn.GCNConv(in_channels=in_ft, out_channels=out_ft, bias=bias) + self.act = nn.PReLU() if act == 'prelu' else act + self.reset_parameters() + + def reset_parameters(self): + self.conv.reset_parameters() + if hasattr(self.act, 'reset_parameters'): + self.act.reset_parameters() + elif isinstance(self.act, nn.PReLU): + self.act.weight.data.fill_(0.25) + + # Shape of seq: (batch, nodes, features) + def forward(self, seq, adj): + out = self.conv(seq, adj) + + return self.act(out) + diff --git a/l2gv2/embedding/dgi/layers/readout.py b/l2gv2/embedding/dgi/layers/readout.py new file mode 100644 index 0000000..9d41305 --- /dev/null +++ b/l2gv2/embedding/dgi/layers/readout.py @@ -0,0 +1,17 @@ +import torch +import torch.nn as nn + + +# Applies an average on seq, of shape (batch, nodes, features) +# While taking into account the masking of msk +class AvgReadout(nn.Module): + def __init__(self): + super(AvgReadout, self).__init__() + + def forward(self, seq, msk): + if msk is None: + return torch.mean(seq, 0) + else: + msk = torch.unsqueeze(msk, -1) + return torch.sum(seq * msk, 0) / torch.sum(msk) + diff --git a/l2gv2/embedding/dgi/models/__init__.py b/l2gv2/embedding/dgi/models/__init__.py new file mode 100644 index 0000000..085fa8b --- /dev/null +++ b/l2gv2/embedding/dgi/models/__init__.py @@ -0,0 +1,2 @@ +from .dgi import DGI +from .logreg import LogReg diff --git a/l2gv2/embedding/dgi/models/dgi.py b/l2gv2/embedding/dgi/models/dgi.py new file mode 100644 index 0000000..f6dfaf8 --- /dev/null +++ b/l2gv2/embedding/dgi/models/dgi.py @@ -0,0 +1,37 @@ +import torch.nn as nn +from ..layers import GCN, AvgReadout, Discriminator + + +class DGI(nn.Module): + def __init__(self, n_in, n_h, activation='prelu'): + super(DGI, self).__init__() + self.gcn = GCN(n_in, n_h, activation) + self.read = AvgReadout() + + self.sigm = nn.Sigmoid() + + self.disc = Discriminator(n_h) + + def reset_parameters(self): + for m in self.children(): + if hasattr(m, 'reset_parameters'): + m.reset_parameters() + + def forward(self, seq1, seq2, adj, msk, samp_bias1, samp_bias2): + h_1 = self.gcn(seq1, adj) + + c = self.read(h_1, msk) + c = self.sigm(c) + + h_2 = self.gcn(seq2, adj) + + ret = self.disc(c, h_1, h_2, samp_bias1, samp_bias2) + + return ret + + # Detach the return variables + def embed(self, data, msk=None): + h_1 = self.gcn(data.x, data.edge_index) + + return h_1.detach() + diff --git a/l2gv2/embedding/dgi/models/logreg.py b/l2gv2/embedding/dgi/models/logreg.py new file mode 100644 index 0000000..bb22381 --- /dev/null +++ b/l2gv2/embedding/dgi/models/logreg.py @@ -0,0 +1,22 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +class LogReg(nn.Module): + def __init__(self, ft_in, nb_classes): + super(LogReg, self).__init__() + self.fc = nn.Linear(ft_in, nb_classes) + + for m in self.modules(): + self.weights_init(m) + + def weights_init(self, m): + if isinstance(m, nn.Linear): + torch.nn.init.xavier_uniform_(m.weight.data) + if m.bias is not None: + m.bias.data.fill_(0.0) + + def forward(self, seq): + ret = self.fc(seq) + return ret + diff --git a/l2gv2/embedding/dgi/utils/__init__.py b/l2gv2/embedding/dgi/utils/__init__.py new file mode 100644 index 0000000..381805e --- /dev/null +++ b/l2gv2/embedding/dgi/utils/__init__.py @@ -0,0 +1 @@ +from .loss import DGILoss diff --git a/l2gv2/embedding/dgi/utils/loss.py b/l2gv2/embedding/dgi/utils/loss.py new file mode 100644 index 0000000..7f5c1d9 --- /dev/null +++ b/l2gv2/embedding/dgi/utils/loss.py @@ -0,0 +1,23 @@ +import torch_geometric as tg +import torch + + +class DGILoss(torch.nn.Module): + def __init__(self): + super().__init__() + self.loss_fun = torch.nn.BCEWithLogitsLoss() + + def forward(self, model, data: tg.data.Data): + device = data.edge_index.device + nb_nodes = data.num_nodes + idx = torch.randperm(nb_nodes, device=device) + + shuf_fts = data.x[idx, :] + + lbl_1 = torch.ones(nb_nodes, device=device) + lbl_2 = torch.zeros(nb_nodes, device=device) + lbl = torch.cat((lbl_1, lbl_2), 0) + + logits = model(data.x, shuf_fts, data.edge_index, None, None, None) + + return self.loss_fun(logits, lbl) diff --git a/l2gv2/embedding/eval.py b/l2gv2/embedding/eval.py new file mode 100644 index 0000000..9cb774f --- /dev/null +++ b/l2gv2/embedding/eval.py @@ -0,0 +1,78 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import numpy as np +from scipy.special import expit +import torch +from sklearn.metrics import roc_auc_score +from local2global_embedding.network import Graph + + +def reconstruction_auc(coordinates, graph: Graph, dist=False, max_samples=int(1e6)): + """ + Compute the network reconstruction auc score + + Args: + coordinates (torch.tensor): embedding to evaluate + graph: network data + dist: if ``True``, use distance decoder to evaluate embedding, otherwise use inner-product decoder + (default: ``False``) + max_samples: maximum number of edges to use for evaluation. If graph has less than ``max_samples`` + edges, all edges are used as positive examples, + otherwise, max_samples edges are sampled with replacement. In both cases, the number of negative + samples is the same as positive samples. + + Returns: + ROC-AUC for correctly classifying true edges versus non-edges + + By default the function samples the same number of non-edges as there are true edges, such that a score of 0.5 + corresponds to random classification. + + """ + if isinstance(coordinates, torch.Tensor): + coordinates = coordinates.cpu().numpy() + + if graph.num_edges > max_samples: + pos_edges = graph.sample_positive_edges(max_samples) + num_samples = max_samples + else: + pos_edges = graph.edge_index + num_samples = graph.num_edges + neg_edges = graph.sample_negative_edges(num_samples) + + if isinstance(pos_edges, torch.Tensor): + pos_edges = pos_edges.cpu().numpy() + + if isinstance(neg_edges, torch.Tensor): + neg_edges = neg_edges.cpu().numpy() + + pos_edges = np.asanyarray(pos_edges) + neg_edges = np.asanyarray(neg_edges) + coordinates = np.asanyarray(coordinates) + if dist: + z = np.concatenate((np.linalg.norm(coordinates[pos_edges[0]]-coordinates[pos_edges[1]], axis=1), + np.linalg.norm(coordinates[neg_edges[0]]-coordinates[neg_edges[1]], axis=1))) + + z = np.exp(-z) + else: + z = np.concatenate((np.sum(coordinates[pos_edges[0]] * coordinates[pos_edges[1]], axis=1), + np.sum(coordinates[neg_edges[0]] * coordinates[neg_edges[1]], axis=1))) + z = expit(z) + y = np.concatenate((np.ones(pos_edges.shape[1]), np.zeros(neg_edges.shape[1]))) + return roc_auc_score(y, z) diff --git a/l2gv2/embedding/gae/__init__.py b/l2gv2/embedding/gae/__init__.py new file mode 100644 index 0000000..964601b --- /dev/null +++ b/l2gv2/embedding/gae/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +from .models import VGAE, GAE +from .utils.loss import VGAE_loss, GAE_loss diff --git a/l2gv2/embedding/gae/layers/GAEconv.py b/l2gv2/embedding/gae/layers/GAEconv.py new file mode 100644 index 0000000..1e64bae --- /dev/null +++ b/l2gv2/embedding/gae/layers/GAEconv.py @@ -0,0 +1,53 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import torch +import torch_geometric as tg +from torch.nn import functional as F + + +class GAEconv(torch.nn.Module): + """ + implements the convolution operator for use with :class:`tg.nn.GAE` + """ + def __init__(self, dim, num_node_features, hidden_dim=32, cached=True, bias=True, add_self_loops=True, normalize=True): + """ + Initialise parameters + + Args: + dim: output dimension + num_node_features: input dimension + hidden_dim: hidden dimension + cached: if ``True``, cache the normalised adjacency matrix after first call + bias: if ``True``, include bias terms + add_self_loops: if ``True``, add self loops before normalising + normalize: if ``True``, normalise adjacency matrix + """ + super().__init__() + self.conv1 = tg.nn.GCNConv(num_node_features, hidden_dim, cached=cached, bias=bias, add_self_loops=add_self_loops, + normalize=normalize) + self.conv2 = tg.nn.GCNConv(hidden_dim, dim, cached=cached, bias=bias, add_self_loops=add_self_loops, + normalize=normalize) + + def forward(self, data): + """compute coordinates given data""" + edge_index = data.edge_index + x = F.relu(self.conv1(data.x, edge_index)) + return self.conv2(x, edge_index) \ No newline at end of file diff --git a/l2gv2/embedding/gae/layers/VGAEconv.py b/l2gv2/embedding/gae/layers/VGAEconv.py new file mode 100644 index 0000000..cca5c14 --- /dev/null +++ b/l2gv2/embedding/gae/layers/VGAEconv.py @@ -0,0 +1,57 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import torch +import torch_geometric as tg +from torch.nn import functional as F + + +class VGAEconv(torch.nn.Module): + """ + implements the convolution operator for use with :class:`torch_geometric.nn.VGAE` + """ + def __init__(self, dim, num_node_features, hidden_dim=32, cached=True, bias=True, add_self_loops=True, normalize=True): + super().__init__() + self.conv1 = tg.nn.GCNConv(num_node_features, hidden_dim, cached=cached, bias=bias, add_self_loops=add_self_loops, + normalize=normalize) + self.mean_conv2 = tg.nn.GCNConv(hidden_dim, dim, cached=cached, bias=bias, add_self_loops=add_self_loops, + normalize=normalize) + self.var_conv2 = tg.nn.GCNConv(hidden_dim, dim, cached=cached, bias=bias, add_self_loops=add_self_loops, + normalize=normalize) + + def forward(self, data: tg.data.Data): + """ + compute mean and variance given data + Args: + data: input data + + Returns: + mu, sigma + + """ + x = data.x + edge_index = data.edge_index + + x = self.conv1(x, edge_index) + x = F.relu(x) + + mu = self.mean_conv2(x, edge_index) + sigma = self.var_conv2(x, edge_index) + return mu, sigma \ No newline at end of file diff --git a/l2gv2/embedding/gae/layers/__init__.py b/l2gv2/embedding/gae/layers/__init__.py new file mode 100644 index 0000000..ea0c0cf --- /dev/null +++ b/l2gv2/embedding/gae/layers/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/l2gv2/embedding/gae/layers/decoders.py b/l2gv2/embedding/gae/layers/decoders.py new file mode 100644 index 0000000..deb2eda --- /dev/null +++ b/l2gv2/embedding/gae/layers/decoders.py @@ -0,0 +1,56 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import torch + + +class DistanceDecoder(torch.nn.Module): + """ + implements the distance decoder which predicts the probability of an edge as the exponential of the + negative euclidean distance between nodes + """ + def __init__(self): + super(DistanceDecoder, self).__init__() + self.dist = torch.nn.PairwiseDistance() + + def forward(self, z, edge_index, sigmoid=True): + """ + compute decoder values + + Args: + z: input coordinates + edge_index: edges + sigmoid: if ``True``, return exponential of negative distance, else return negative distance + + """ + value = -self.dist(z[edge_index[0]], z[edge_index[1]]) + return torch.exp(value) if sigmoid else value + + def forward_all(self, z, sigmoid=True): + """ + compute value for all node pairs + + Args: + z: input coordinates + sigmoid: if ``True``, return exponential of negative distance, else return negative distance + + """ + adj = -torch.cdist(z, z) + return torch.exp(adj) if sigmoid else adj \ No newline at end of file diff --git a/l2gv2/embedding/gae/models/__init__.py b/l2gv2/embedding/gae/models/__init__.py new file mode 100644 index 0000000..f9c01b9 --- /dev/null +++ b/l2gv2/embedding/gae/models/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +from .gae import GAE +from .vgae import VGAE \ No newline at end of file diff --git a/l2gv2/embedding/gae/models/gae.py b/l2gv2/embedding/gae/models/gae.py new file mode 100644 index 0000000..e8bc27e --- /dev/null +++ b/l2gv2/embedding/gae/models/gae.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import torch_geometric as tg + +from ..utils.mixins import EmbeddingMixin +from ..layers.decoders import DistanceDecoder +from ..layers.GAEconv import GAEconv + + +class GAE(tg.nn.GAE, EmbeddingMixin): + def __init__(self, dim, hidden_dim, num_features, dist=False): + """ + initialise a Graph Auto-Encoder model + + Args: + dim: output dimension + hidden_dim: inner hidden dimension + num_features: number of input features + dist: if ``True`` use distance decoder, otherwise use inner product decoder (default: ``False``) + + Returns: + initialised :class:`tg.nn.GAE` model + """ + if dist: + super().__init__(encoder=GAEconv(dim, num_node_features=num_features, hidden_dim=hidden_dim), + decoder=DistanceDecoder()) + else: + super().__init__(encoder=GAEconv(dim, num_node_features=num_features, hidden_dim=hidden_dim)) \ No newline at end of file diff --git a/l2gv2/embedding/gae/models/vgae.py b/l2gv2/embedding/gae/models/vgae.py new file mode 100644 index 0000000..8c7e0d2 --- /dev/null +++ b/l2gv2/embedding/gae/models/vgae.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import torch_geometric as tg + +from ..utils.mixins import EmbeddingMixin +from ..layers.decoders import DistanceDecoder +from ..layers.VGAEconv import VGAEconv + + +class VGAE(tg.nn.VGAE, EmbeddingMixin): + def __init__(self, dim, hidden_dim, num_features, dist=False): + """ + initialise a Variational Graph Auto-Encoder model + + Args: + dim: output dimension + hidden_dim: inner hidden dimension + num_features: number of input features + dist: if ``True`` use distance decoder, otherwise use inner product decoder (default: ``False``) + + Returns: + initialised :class:`tg.nn.VGAE` model + """ + if dist: + super().__init__(encoder=VGAEconv(dim, num_node_features=num_features, hidden_dim=hidden_dim), + decoder=DistanceDecoder()) + else: + super().__init__(encoder=VGAEconv(dim, num_node_features=num_features, hidden_dim=hidden_dim)) \ No newline at end of file diff --git a/l2gv2/embedding/gae/utils/__init__.py b/l2gv2/embedding/gae/utils/__init__.py new file mode 100644 index 0000000..ea0c0cf --- /dev/null +++ b/l2gv2/embedding/gae/utils/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/l2gv2/embedding/gae/utils/loss.py b/l2gv2/embedding/gae/utils/loss.py new file mode 100644 index 0000000..3ac17a1 --- /dev/null +++ b/l2gv2/embedding/gae/utils/loss.py @@ -0,0 +1,46 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +def VGAE_loss(model, data): + """ + loss function for use with :func:`VGAE_model` + + Args: + model: + data: + + Returns: + loss value + """ + return model.recon_loss(model.encode(data), data.edge_index) + model.kl_loss() / data.num_nodes + + +def GAE_loss(model, data): + """ + loss function for use with :func:`GAE_model` + + Args: + model: + data: + + Returns: + reconstruction loss + """ + return model.recon_loss(model.encode(data), data.edge_index) \ No newline at end of file diff --git a/l2gv2/embedding/gae/utils/mixins.py b/l2gv2/embedding/gae/utils/mixins.py new file mode 100644 index 0000000..4686f15 --- /dev/null +++ b/l2gv2/embedding/gae/utils/mixins.py @@ -0,0 +1,44 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import torch + + +class EmbeddingMixin: + def embed(self, data): + """ + Compute embedding for model and data + + Args: + data: network + + Returns: + embedding coords for nodes + + This function switches the model to eval state before computing the embedding and restores the original + training state of the model + + """ + train_state = self.training + self.training = False + with torch.no_grad(): + coords = self.encode(data) + self.training = train_state + return coords \ No newline at end of file diff --git a/l2gv2/embedding/svd.py b/l2gv2/embedding/svd.py new file mode 100644 index 0000000..6810b8e --- /dev/null +++ b/l2gv2/embedding/svd.py @@ -0,0 +1,228 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import scipy.sparse as ss +import scipy.sparse.linalg as sl +import numpy as np + +from local2global import Patch + + +# modified from scipy.sparse.linalg.svds: +# Copyright (c) 2001-2002 Enthought, Inc. 2003-2019, SciPy Developers. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +def _augmented_orthonormal_cols(x, k): + # extract the shape of the x array + n, m = x.shape + # create the expanded array and copy x into it + y = np.empty((n, m+k), dtype=x.dtype) + y[:, :m] = x + # do some modified gram schmidt to add k random orthonormal vectors + for i in range(k): + # sample a random initial vector + v = np.random.randn(n) + if np.iscomplexobj(x): + v = v + 1j*np.random.randn(n) + # subtract projections onto the existing unit length vectors + for j in range(m+i): + u = y[:, j] + v -= (np.dot(v, u.conj()) / np.dot(u, u.conj())) * u + # normalize v + v /= np.sqrt(np.dot(v, v.conj())) + # add v into the output array + y[:, m+i] = v + # return the expanded array + return y + + +def _augmented_orthonormal_rows(x, k): + return _augmented_orthonormal_cols(x.T, k).T + + +def _svds_laplacian(A, k=6, tol=1e-8, + maxiter=None, random_state=None, maxrestarts=10, verbose=0): + """Compute the largest or smallest k singular values/vectors for a sparse matrix. The order of the singular values is not guaranteed. + + Parameters + ---------- + A : {sparse matrix, LinearOperator} + Array to compute the SVD on, of shape (M, N) + k : int, optional + Number of singular values and vectors to compute. + Must be 1 <= k < min(A.shape). + tol : float, optional + Tolerance for singular values. Zero (default) means machine precision. + + maxiter : int, optional + Maximum number of iterations. + + .. versionadded:: 0.12.0 + + Returns + ------- + u : ndarray, shape=(M, k) + Unitary matrix having left singular vectors as columns. + If `return_singular_vectors` is "vh", this variable is not computed, + and None is returned instead. + s : ndarray, shape=(k,) + The singular values. + vt : ndarray, shape=(k, N) + Unitary matrix having right singular vectors as rows. + + + + Notes + ----- + This is a naive implementation using LOBPCG as an eigensolver + on A.H * A or A * A.H, depending on which one is more efficient. + """ + if maxiter is None: + maxiter = max(k, 20) + rg = np.random.default_rng(random_state) + + d1 = np.asarray(A.sum(axis=1)).flatten() + D1 = ss.diags(d1 ** (-0.5)) + + d2 = np.asarray(A.sum(axis=0)).flatten() + D2 = ss.diags(d2 ** -0.5) + A = D1 @ A @ D2 + n, m = A.shape + + if k <= 0 or k >= min(n, m): + raise ValueError("k must be between 1 and min(A.shape), k=%d" % k) + else: + if n > m: + X_dot = X_matmat = A.dot + XH_dot = XH_mat = A.T.dot + v0 = d2**0.5 + else: + XH_dot = XH_mat = A.dot + X_dot = X_matmat = A.T.dot + v0 = d1**0.5 + + def matvec_XH_X(x): + return XH_dot(X_dot(x)) + + def matmat_XH_X(x): + return XH_mat(X_matmat(x)) + + XH_X = sl.LinearOperator(matvec=matvec_XH_X, dtype=A.dtype, + matmat=matmat_XH_X, + shape=(min(A.shape), min(A.shape))) + + # Get a low rank approximation of the implicitly defined gramian matrix. + # This is not a stable way to approach the problem. + + + X = rg.normal(size=(min(A.shape), k)) + for _ in range(maxrestarts): + eigvals, eigvec, res = sl.lobpcg(XH_X, X, Y=v0[:, None], tol=tol, maxiter=maxiter, + largest=True, retResidualNormsHistory=True, verbosityLevel=verbose) + if res[-1].max() > tol: + X = eigvec + rg.normal(size=eigvec.shape, scale=0.5*tol) + else: + break + + # Gramian matrices have real non-negative eigenvalues. + eigvals = np.maximum(eigvals.real, 0) + + # Use the sophisticated detection of small eigenvalues from pinvh. + t = eigvec.dtype.char.lower() + factor = {'f': 1E3, 'd': 1E6} + cond = factor[t] * np.finfo(t).eps + cutoff = cond * np.max(eigvals) + + # Get a mask indicating which eigenpairs are not degenerately tiny, + # and create the re-ordered array of thresholded singular values. + above_cutoff = (eigvals > cutoff) + nlarge = above_cutoff.sum() + nsmall = k - nlarge + slarge = np.sqrt(eigvals[above_cutoff]) + s = np.zeros_like(eigvals) + s[:nlarge] = slarge + + if n > m: + vlarge = eigvec[:, above_cutoff] + ularge = X_matmat(vlarge) / slarge + vhlarge = vlarge.T + else: + ularge = eigvec[:, above_cutoff] + vhlarge = (X_matmat(ularge) / slarge).T + + u = _augmented_orthonormal_cols(ularge, nsmall) if ularge is not None else None + vh = _augmented_orthonormal_rows(vhlarge, nsmall) if vhlarge is not None else None + + indexes_sorted = np.argsort(s) + s = s[indexes_sorted] + if u is not None: + u = u[:, indexes_sorted] + if vh is not None: + vh = vh[indexes_sorted] + + return D1 @ u, s, vh @ D2 + + +def bipartite_svd_patches(A: ss.spmatrix, dim, verbose=0): + """ + SVD embedding of bipartite network + Args: + A: + dim: + + Returns: + + """ + index1, _ = A.sum(axis=1).nonzero() + _, index2 = A.sum(axis=0).nonzero() + R1 = ss.coo_matrix((np.ones(index1.size), (np.arange(index1.size), index1)), shape=(index1.size, A.shape[0])) + R2 = ss.coo_matrix((np.ones(index2.size), (index2, np.arange(index2.size))), shape=(A.shape[1], index2.size)) + A = R1 @ A @ R2 + + U, s, Vh = _svds_laplacian(A, k=dim, verbose=verbose) + return Patch(index1, U), Patch(index2, Vh.T) + diff --git a/l2gv2/embedding/train.py b/l2gv2/embedding/train.py new file mode 100644 index 0000000..223c295 --- /dev/null +++ b/l2gv2/embedding/train.py @@ -0,0 +1,95 @@ +# Copyright (c) 2021. Lucas G. S. Jeub +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +import tempfile + +import torch + +from local2global_embedding.utils import EarlyStopping + +def lr_grid_search(data, model, loss_fun, validation_loss_fun, lr_grid=(0.1, 0.01, 0.005, 0.001), + num_epochs=10, runs=1, verbose=True): + """ + grid search over learning rate values + + Args: + data: input data + model: model to train + loss_fun: training loss takes model and data as input + validation_loss_fun: function to compute validation loss input: (model, data) + lr_grid: learning rate values to try + num_epochs: number of epochs for training + runs: number of training runs to average over for selecting best learning rate + verbose: if ``True``, output training progress + + Returns: + best learning rate, validation loss for all runs + """ + val_loss = torch.zeros((len(lr_grid), runs)) + val_start = torch.zeros((len(lr_grid), runs)) + for i, lr in enumerate(lr_grid): + for r in range(runs): + model.reset_parameters() + model = train(data, model, loss_fun, num_epochs=num_epochs, lr=lr, verbose=verbose) + val_loss[i, r] = validation_loss_fun(model, data) + model.reset_parameters() + return lr_grid[torch.argmax(torch.mean(val_loss, 1))], val_loss + + +def train(data, model, loss_fun, num_epochs=10000, patience=20, lr=0.01, weight_decay=0.0, verbose=True, + logger=lambda loss: None): + """ + train an embedding model + + Args: + data: network data + model: embedding auto-encoder model + loss_fun: loss function to use with model (takes arguments ``model``, ``data``) + num_epochs: number of training epochs + patience: patience for early stopping + lr: learining rate (default: 0.01) + weight_decay: weight decay for optimizer (default: 0.0) + verbose: if ``True``, display training progress (default: ``True``) + logger: function that receives the training loss as input and is called after each epoch (does nothing by default) + + Returns: + trained model + + This function uses the Adam optimizer for training. + """ + best = float('inf') + cnt_wait = 0 + best_e = 0 + optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + with EarlyStopping(patience) as stop: + for e in range(num_epochs): + model.train() + optimizer.zero_grad() + loss = loss_fun(model, data) + f_loss = float(loss) + logger(f_loss) + if verbose: + print(f'epoch {e}: loss={f_loss}') + if stop(f_loss, model): + if verbose: + print(f'Early stopping at epoch {e}') + break + loss.backward() + optimizer.step() + return model