Utility.py

import os
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import scipy.sparse as sp
import torch
import torchvision
from torch.utils.data import DataLoader, Subset
from torchvision import transforms, utils

import pickle as pkl
import networkx as nx
from sklearn.metrics import roc_auc_score, average_precision_score


def prepare_dir(model_label):
    if not os.path.exists('data'):
        os.mkdir('data')
    if not os.path.exists('model'):
        os.mkdir('model')
    if not os.path.exists('result'):
        os.mkdir('result')

    # label dir
    model_label_dir = os.path.join('model', model_label)
    if not os.path.exists(model_label_dir):
        os.mkdir(model_label_dir)
    model_path = os.path.join(model_label_dir, 'final_model')

    # trace dir
    model_trace_dir = os.path.join(model_label_dir, 'trace')
    if not os.path.exists(model_trace_dir):
        os.mkdir(model_trace_dir)
    model_trace_path = os.path.join(model_trace_dir, 'trace_Epoch_{0}_Loss_{1:.4}')

    # result dir
    result_dir = os.path.join('result', model_label)
    if not os.path.exists(result_dir):
        os.mkdir(result_dir)
    model_result_trace_path = os.path.join(result_dir, 'trace_Epoch_{0}.png')
    model_result_final_dir = result_dir
    middle_result_trace_path = os.path.join(result_dir, 'Mid_trace_Epoch_{0}.png')
    return model_path, model_trace_path, model_result_trace_path, \
           model_result_final_dir, middle_result_trace_path, model_label_dir


def get_input_white_noise(white_noise_dim, white_noise_var, white_noise_num):
    white_noise = torch.randn(white_noise_num, white_noise_dim) * (white_noise_var ** 0.5)
    return white_noise


def get_input_mixture_white_noise(white_noise_dim, white_noise_var, white_noise_num, mixture_num=10):
    white_cov_noise = torch.randn(white_noise_num, white_noise_dim) * (white_noise_var ** 0.5)
    linear_mu = torch.div(torch.randint(1, mixture_num, (white_noise_num,)), float(mixture_num * 2))
    linear_mu = linear_mu - torch.ones_like(linear_mu)
    linear_mu = linear_mu.unsqueeze(-1)
    linear_mu = linear_mu.repeat(1, white_noise_dim)
    white_noise = linear_mu + white_cov_noise
    return white_noise


def get_input_uniform_noise(uniform_noise_upb, uniform_noise_lowb, uniform_noise_dim, uniform_noise_num):
    white_noise = np.random.uniform(low=uniform_noise_lowb, high=uniform_noise_upb,
                                    size=[uniform_noise_num, uniform_noise_dim])
    return torch.from_numpy(white_noise)

# Adopted from https://github.com/tkipf/gae/blob/master/gae/input_data.py
def get_graph_target(source):
    if source not in ['cora', 'citeseer', 'pubmed']:
        raise SystemExit('Error: Unknown source for target: {0}'.format(source))
    # load the data: x, tx, allx, graph
    names = ['x', 'tx', 'allx', 'graph']
    objects = []
    for i in range(len(names)):
        '''
        fix Pickle incompatibility of numpy arrays between Python 2 and 3
        https://stackoverflow.com/questions/11305790/pickle-incompatibility-of-numpy-arrays-between-python-2-and-3
        '''
        with open("data/ind.{}.{}".format(source, names[i]), 'rb') as rf:
            u = pkl._Unpickler(rf)
            u.encoding = 'latin1'
            cur_data = u.load()
            objects.append(cur_data)
        # objects.append(
        #     pkl.load(open("data/ind.{}.{}".format(dataset, names[i]), 'rb')))
    x, tx, allx, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(source))
    test_idx_range = np.sort(test_idx_reorder)

    if source == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(
            min(test_idx_reorder), max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    features = torch.FloatTensor(np.array(features.todense()))
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    return adj, features


def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

### The same as https://github.com/tkipf/gae/blob/master/gae/preprocessing.py
def mask_test_edges(adj):
    # Function to build test set with 10% positive links
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
    # TODO: Clean up.

    # Remove diagonal elements
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] / 10.))
    num_val = int(np.floor(edges.shape[0] / 20.))

    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)

    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)

    data = np.ones(train_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false


def preprocess_graph(adj):
    adj = sp.coo_matrix(adj)
    adj_ = adj + sp.eye(adj.shape[0])
    rowsum = np.array(adj_.sum(1))
    degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())
    adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
    # return sparse_to_tuple(adj_normalized)
    return sparse_mx_to_torch_sparse_tensor(adj_normalized)


def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

###

def pdist(e, squared=False, eps=1e-12):
    e_square = e.pow(2).sum(dim=1)
    prod = e @ e.t()
    res = (e_square.unsqueeze(1) + e_square.unsqueeze(0) - 2 * prod).clamp(min=eps)

    if not squared:
        res = res.sqrt()

    res = res.clone()
    res[range(len(e)), range(len(e))] = 0
    return res


def get_roc_score(emb, adj_orig, edges_pos, edges_neg):
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # Predict on test set of edges
    adj_rec = np.dot(emb, emb.T)
    # adj_rec = pdist(emb)
    # adj_rec = (adj_rec.max() - adj_rec)/adj_rec.max()
    preds = []
    pos = []
    for e in edges_pos:
        preds.append(sigmoid(adj_rec[e[0], e[1]]))
        # preds.append((adj_rec[e[0], e[1]]))
        pos.append(adj_orig[e[0], e[1]])

    preds_neg = []
    neg = []
    for e in edges_neg:
        preds_neg.append(sigmoid(adj_rec[e[0], e[1]]))
        # preds_neg.append((adj_rec[e[0], e[1]]))
        neg.append(adj_orig[e[0], e[1]])

    preds_all = np.hstack([preds, preds_neg])
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
    roc_score = roc_auc_score(labels_all, preds_all)
    ap_score = average_precision_score(labels_all, preds_all)

    return roc_score, ap_score


def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape


def normalise_adj(adj):
    # print(adj.sum(dim=1))
    inv_degree = torch.pow(adj.sum(dim=1), -0.5)
    inv_degree_matrix = torch.diag(inv_degree)
    return torch.matmul(inv_degree_matrix, torch.matmul(adj, inv_degree_matrix))


def save_data(x, dim, ax, channel, save_dir, padding=0):
    x = x / 2 + 0.5
    # examples = torch.clamp(x[:64, :].view(64, channel, dim, dim), 0, 1)
    examples = torch.clamp(x[:64, :, :, :], 0, 1)
    ax.axis('off')
    ax.imshow(np.transpose(utils.make_grid(examples, padding=padding, normalize=True), (1, 2, 0)))
    utils.save_image(examples, save_dir, padding=padding)


def record_as_img(x, save_dir):
    examples = x[:100, :]
    examples = (examples + 1) / 2.
    _, ax = plt.subplots(1)
    ax.axis('off')
    ax.imshow(examples)
    utils.save_image(examples, save_dir)
    return


def tensorboard_img_writer(x, writer, name):
    examples = x[:100, :]
    examples = (examples + 1) / 2.
    writer.add_image(name, examples)


class avg_record(object):
    """Computes and stores the average and current value
       Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
    """

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.