-
Notifications
You must be signed in to change notification settings - Fork 2
/
input_data.py
120 lines (93 loc) · 3.41 KB
/
input_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
import scipy.io as io
def parse_index_file(filename):
index = []
for line in open(filename):
index.append(int(line.strip()))
return index
def load_protein():
n = io.loadmat("data/Homo_sapiens.mat")
return n['network'], n['group']
def load_enzyme():
adj = sp.lil_matrix((125, 125))
features = sp.lil_matrix((125, 1))
for line in open("data/ENZYMES_g296.edges"):
vals = line.split()
x = int(vals[0]) - 2
y = int(vals[1]) - 2
adj[y, x] = adj[x, y] = 1
return adj, features
def load_florida():
adj = sp.lil_matrix((128, 128))
features = sp.lil_matrix((128, 1))
for line in open("data/eco-florida.edges"):
vals = line.split()
x = int(vals[0]) - 1
y = int(vals[1]) - 1
val = float(vals[2])
adj[y, x] = adj[x, y] = val
return adj, features
def load_brain():
adj = sp.lil_matrix((1780, 1780))
features = sp.lil_matrix((1780, 1))
nums = []
for line in open("data/bn-fly-drosophila_medulla_1.edges"):
vals = line.split()
x = int(vals[0]) - 1
y = int(vals[1]) - 1
adj[y, x] = adj[x, y] = adj[x, y] + 1
return adj, features
def load_data(dataset):
if dataset == 'florida':
return load_florida()
elif dataset == 'brain':
return load_brain()
elif dataset == 'enzyme':
return load_enzyme()
elif dataset == 'protein':
return load_protein()
# load the data: x, tx, allx, graph
names = ['x', 'tx', 'allx', 'graph']
objects = []
for i in range(len(names)):
objects.append(pkl.load(open("data/ind.{}.{}".format(dataset, names[i]))))
x, tx, allx, graph = tuple(objects)
test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset))
test_idx_range = np.sort(test_idx_reorder)
if dataset == 'citeseer':
# Fix citeseer dataset (there are some isolated nodes in the graph)
# Find isolated nodes, add them as zero-vecs into the right position
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
tx_extended[test_idx_range-min(test_idx_range), :] = tx
tx = tx_extended
# if dataset == 'cora':
# names = ['y', 'ty', 'ally']
# objects = []
# for i in range(len(names)):
# objects.append(pkl.load(open("data/ind.{}.{}".format(dataset, names[i]))))
# y, ty, ally = tuple(objects)
# labels = np.vstack((ally, ty))
# labels[test_idx_reorder, :] = labels[test_idx_range, :]
# np.save('labels', labels)
features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
return adj, features
def load_data_syn(type_):
adj=np.load('./graph generator/WS_graph.npy')
node=np.load('./graph generator/WS_graph_node.npy')
if type_=='train':
index = [i for i in range(len(adj))]
np.random.shuffle(index)
adj = adj[index]
node = node[index]
for n in range(len(adj)):
for i in range(20):
for j in range(20):
if adj[n,i,j]!=adj[n,j,i]:
print('is not symetric')
return np.array(adj),node.reshape(adj.shape[0],adj.shape[1],-1,1)