-
Notifications
You must be signed in to change notification settings - Fork 3
/
utils.py
55 lines (43 loc) · 2.27 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import time
import requests
import tarfile
import numpy as np
import argparse
import torch
################################
### LOADING THE CORA DATASET ###
################################
def load_cora(path='./cora', device='cpu'):
"""
Loads the Cora dataset. The dataset is downloaded from https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz.
"""
# Set the paths to the data files
content_path = os.path.join(path, 'cora.content')
cites_path = os.path.join(path, 'cora.cites')
# Load data from files
content_tensor = np.genfromtxt(content_path, dtype=np.dtype(str))
cites_tensor = np.genfromtxt(cites_path, dtype=np.int32)
# Process features
features = torch.FloatTensor(content_tensor[:, 1:-1].astype(np.int32)) # Extract feature values
scale_vector = torch.sum(features, dim=1) # Compute sum of features for each node
scale_vector = 1 / scale_vector # Compute reciprocal of the sums
scale_vector[scale_vector == float('inf')] = 0 # Handle division by zero cases
scale_vector = torch.diag(scale_vector).to_sparse() # Convert the scale vector to a sparse diagonal matrix
features = scale_vector @ features # Scale the features using the scale vector
# Process labels
classes, labels = np.unique(content_tensor[:, -1], return_inverse=True) # Extract unique classes and map labels to indices
labels = torch.LongTensor(labels) # Convert labels to a tensor
# Process adjacency matrix
idx = content_tensor[:, 0].astype(np.int32) # Extract node indices
idx_map = {id: pos for pos, id in enumerate(idx)} # Create a dictionary to map indices to positions
# Map node indices to positions in the adjacency matrix
edges = np.array(
list(map(lambda edge: [idx_map[edge[0]], idx_map[edge[1]]],
cites_tensor)), dtype=np.int32)
V = len(idx) # Number of nodes
E = edges.shape[0] # Number of edges
adj_mat = torch.sparse_coo_tensor(edges.T, torch.ones(E), (V, V), dtype=torch.int64) # Create the initial adjacency matrix as a sparse tensor
adj_mat = torch.eye(V) + adj_mat # Add self-loops to the adjacency matrix
# return features.to_sparse().to(device), labels.to(device), adj_mat.to_sparse().to(device)
return features.to(device), labels.to(device), adj_mat.to(device)