-
Notifications
You must be signed in to change notification settings - Fork 3
/
utils_NoGE.py
94 lines (85 loc) · 3.24 KB
/
utils_NoGE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import numpy as np
import time
import torch
from collections import defaultdict
import argparse
import scipy.sparse as sp
from collections import Counter
import itertools
from scipy import sparse
torch.manual_seed(1337)
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
torch.cuda.manual_seed_all(1337)
np.random.seed(1337)
def normalize_sparse(mx):
"""Row-normalize sparse matrix"""
rowsum = np.array(mx.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
mx = r_mat_inv.dot(mx)
return mx
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
"""Convert a scipy sparse matrix to a torch sparse tensor."""
sparse_mx = sparse_mx.tocoo().astype(np.float32)
indices = torch.from_numpy(
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
values = torch.from_numpy(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
return torch.sparse.FloatTensor(indices, values, shape)
# The new weighted Adj matrix
def compute_weighted_adj_matrix(data, num_relations): #change: num_entities
# Skipgrams
back_window = 2
front_window = 2
skipgram_counts = Counter()
for tokens in data:
tokens[0] += num_relations
tokens[2] += num_relations #change: tokens[1] += num_entities
for ii_word, word in enumerate(tokens):
ii_context_min = max(0, ii_word - back_window)
ii_context_max = min(len(tokens) - 1, ii_word + front_window)
ii_contexts = [
ii for ii in range(ii_context_min, ii_context_max + 1)
if ii != ii_word]
for ii_context in ii_contexts:
skipgram = (tokens[ii_word], tokens[ii_context])
skipgram_counts[skipgram] += 1
# Word-Word Count Matrix
row_indxs = []
col_indxs = []
dat_values = []
for (tok1, tok2), sg_count in skipgram_counts.items():
row_indxs.append(tok1)
col_indxs.append(tok2)
dat_values.append(sg_count)
wwcnt_mat = sparse.csr_matrix((dat_values, (row_indxs, col_indxs)))
num_skipgrams = wwcnt_mat.sum()
assert (sum(skipgram_counts.values()) == num_skipgrams)
# for creating sparse matrices
row_indxs = []
col_indxs = []
weighted_edges = []
# reusable quantities
sum_over_contexts = np.array(wwcnt_mat.sum(axis=1)).flatten()
# computing weights for edges
for (tok_word, tok_context), sg_count in skipgram_counts.items():
nwc = sg_count
Pwc = nwc / num_skipgrams
nw = sum_over_contexts[tok_word]
Pw = nw / num_skipgrams
#
edge_val = Pwc / Pw # for entity-entity edges
if tok_word < num_relations or tok_context < num_relations: # for relation-entity edges #change: tok_word > num_entities or tok_context > num_entities
edge_val = Pwc
row_indxs.append(tok_word)
col_indxs.append(tok_context)
weighted_edges.append(edge_val)
edge_mat = sparse.csr_matrix((weighted_edges, (row_indxs, col_indxs)))
# adding self-loop
adj = edge_mat + sparse.eye(edge_mat.shape[0], format="csr")
# print(adj)
adj = normalize_sparse(adj)
adj = sparse_mx_to_torch_sparse_tensor(adj)
return adj