-
Notifications
You must be signed in to change notification settings - Fork 1
/
graph_core.py
211 lines (175 loc) · 7.52 KB
/
graph_core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""
Get the "topological core" of this graph.
The "topological core" is an idea I came up with for isolating the
"topologically interesting" parts of a graph. We get to it by iteratively
removing leaf nodes and collapsing linear paths:
* For leaf nodes (nodes of degree 1): We delete the node and it's one edge.
* For path nodes (nodes of degree 2): We "lift" the node (delete it and
connect its former neighbors).
* All self-edges or duplicate edges produced this way are omitted.
* The process is repeated until all nodes are degree >= 3 (or, vacuously,
until there are no nodes left).
This shrinks the graph down to a tight core which is interconnected in
non-trivial ways.
Every node from the original graph can be associated with either a node or
edge in the core graph.
* If node A is associated with node B in the core graph, that means that
removing node B from the original graph would separate A from all other
core nodes. In other words, node A is connected into the core only through
gateway node B.
* If node C is associated with edge DE in the core graph, that means that
removing nodes D & E from the original graph would separate C from the
other core nodes. In ohter words, node C is connected into the core
through both gateway nodes D & E.
* Any node which is connected to the core through >2 gateway nodes will be
in the core itself.
"""
import argparse
import csv
from pathlib import Path
import time
import networkx as nx
import graph_tools
import utils
Node = str
def CollectRay(graph : nx.Graph, node : Node, previous : Node | None = None
) -> tuple[Node | None, list[Node]]:
path : list[Node] = []
# Follow path until we reach a fork (or dead end or loop).
while not previous or graph.degree[node] == 2:
if node in path:
# We found a loop!
return None, path
path.append(node)
(neighbor,) = set(graph.adj[node].keys()) - set([previous])
previous = node
node = neighbor
return node, path
def CollectPath(graph : nx.Graph, node : Node
) -> tuple[tuple[Node | None, Node | None], list[Node]]:
"""Follow a path from node to both ends. Return the pair of ends and
list of nodes in the path."""
assert graph.degree[node] == 2
neigh_a, neigh_b = graph.adj[node].keys()
end_a, path_a = CollectRay(graph, neigh_a, previous=node)
end_b, path_b = CollectRay(graph, neigh_b, previous=node)
return (end_a, end_b), (list(reversed(path_a)) + [node] + path_b)
def ContractGraph(graph : nx.Graph, rep_nodes : dict[Node, set[Node]]) -> bool:
# Note: We may delete more nodes than to_delete.
to_delete = set()
for node in graph.nodes:
if graph.degree[node] <= 2:
to_delete.add(node)
for node in to_delete:
if node in graph.nodes:
if graph.degree[node] == 0:
# Strip all isolated nodes directly.
graph.remove_node(node)
del rep_nodes[node]
elif graph.degree[node] == 1:
end, path = CollectRay(graph, node)
for n in path:
if end:
rep_nodes[end].update(rep_nodes[n])
del rep_nodes[n]
graph.remove_nodes_from(path)
else:
assert graph.degree[node] == 2, (node, graph.degree[node])
# Lift all path nodes. Get full linear path and two ends.
# Note: path may not be a subset of to_delete since the degrees of
# some nodes may have changed since we collected to_delete.
(end_a, end_b), path = CollectPath(graph, node)
dist = nx.path_weight(graph, [end_a] + path + [end_b], "weight")
for n in path:
for end in (end_a, end_b):
if end is not None:
rep_nodes[end].update(rep_nodes[n])
del rep_nodes[n]
# Directly connect the two ends together
graph.add_edge(end_a, end_b, weight=dist)
# Strip all degree 2 nodes in path.
graph.remove_nodes_from(path)
# Return True if any nodes were deleted.
# Note: # nodes deleted is >= len(to_delete)
return len(to_delete) > 0
def FindCore(graph : nx.Graph) -> tuple[nx.MultiGraph, dict[Node, set[Node]]]:
"""Iteratively contract the graph until we reach the core."""
# Convert to weighted multigraph.
graph = nx.MultiGraph(graph)
nx.set_edge_attributes(graph, values = 1, name = 'weight')
# Map: core nodes -> nodes that collapse into this core node
rep_nodes = {node: set([node]) for node in graph.nodes}
while ContractGraph(graph, rep_nodes):
pass
return graph, rep_nodes
def RemoveRays(graph : nx.Graph) -> nx.Graph:
"""Remove all rays from graph."""
points = set()
for node in graph.nodes:
if graph.degree[node] == 1:
points.add(node)
for node in points:
end, path = CollectRay(graph, node)
graph.remove_nodes_from(path)
return graph
def degree_distr(graph : nx.Graph, max_degree : int) -> list[int]:
degree_counts = [0] * (max_degree + 1)
for node in graph.nodes:
degree = min(graph.degree[node], max_degree)
degree_counts[degree] += 1
return degree_counts
def degree_distr_str(graph : nx.Graph, max_degree : int = 6) -> str:
degree_counts = degree_distr(graph, max_degree)
count_strs = [f"{degree}:{degree_counts[degree]:_}"
for degree in range(max_degree)]
count_str = " ".join(count_strs)
return f" Degree dist: {count_str} {max_degree}+:{degree_counts[max_degree]:_}"
def num_dup_edges(graph : nx.MultiGraph) -> int:
"""Count number of duplicate edges (edges beyond the first between any given
pair of nodes.)"""
# Count total number of non-duplicate edges (i.e. unique pairs of nodes
# connected by at least one edge).
num_unique_edges = len(frozenset(
frozenset((u, v)) for (u, v, id) in graph.edges))
return len(graph.edges) - num_unique_edges
def main():
parser = argparse.ArgumentParser()
parser.add_argument("graph_in", type=Path)
args = parser.parse_args()
graph_dir = args.graph_in.parent
utils.log("Loading graph")
graph = graph_tools.load_graph(args.graph_in)
utils.log(f"Loaded graph: {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges")
utils.log(degree_distr_str(graph))
graph = graph_tools.largest_component(graph)
utils.log(f"Main component: {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges")
utils.log(degree_distr_str(graph))
basename = graph_dir / "connect"
filename = graph_tools.write_graph(graph, basename)
utils.log(f" Saved main component to {str(filename)}")
graph = graph_tools.largest_bicomponent(graph)
utils.log(f"Main bicomponent: {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges")
utils.log(degree_distr_str(graph))
basename = graph_dir / "biconnect"
filename = graph_tools.write_graph(graph, basename)
utils.log(f" Saved main bicomponent to {str(filename)}")
# Map: core nodes -> nodes that collapse into this core node
graph, rep_nodes = FindCore(graph)
utils.log(f"Topological Core: {len(graph.nodes):_} Nodes / {len(graph.edges):_} Edges / {num_dup_edges(graph):_} Duplicate edges / {nx.number_of_selfloops(graph):_} Selfloops")
utils.log(degree_distr_str(graph))
basename = graph_dir / "topo"
filename = graph_tools.write_graph(graph, basename)
utils.log(f" Saved topo to {str(filename)}")
filename = graph_dir / "topo.collapse.csv"
with open(filename, "w") as f:
csv_out = csv.DictWriter(f, ["core_node", "sub_node"])
csv_out.writeheader()
for core_node in rep_nodes:
for sub_node in rep_nodes[core_node]:
csv_out.writerow({
"core_node": core_node,
"sub_node": sub_node,
})
utils.log(f" Saved node collapse info to {str(filename)}")
if __name__ == "__main__":
main()