From e690e36a4761f3d2f8cb1d869489b3172c6c999a Mon Sep 17 00:00:00 2001 From: kyechou Date: Sun, 28 Jul 2024 11:10:36 -0500 Subject: [PATCH] cleanup --- .../15-real-networks/parse-network-dataset.py | 106 +++++++++-------- examples/logparser.py | 46 ++++--- src/config.py | 4 +- src/dataparse.py | 112 +++++++++--------- 4 files changed, 143 insertions(+), 125 deletions(-) diff --git a/examples/15-real-networks/parse-network-dataset.py b/examples/15-real-networks/parse-network-dataset.py index 7c251535..e8ec4571 100644 --- a/examples/15-real-networks/parse-network-dataset.py +++ b/examples/15-real-networks/parse-network-dataset.py @@ -1,30 +1,26 @@ #!/usr/bin/env python3 +import argparse import logging import os import re -import sys -import argparse - -sys.path.append(os.path.join(os.path.dirname(__file__), '../../src')) -from dataparse import * def parse_snap(in_dir, out_dir): prefix = os.path.basename(in_dir) for entry in os.scandir(in_dir): - if not entry.name.endswith('.txt'): + if not entry.name.endswith(".txt"): continue nodes = set() edges = set() logging.info("Processing %s", entry.path) - with open(entry.path, 'r') as f: + with open(entry.path, "r") as f: for line in f: tokens = line.split() - if tokens[0] == '#': + if tokens[0] == "#": continue if tokens[0] == tokens[1]: # self-to-self link continue @@ -34,11 +30,11 @@ def parse_snap(in_dir, out_dir): edges.add((tokens[0], tokens[1])) out_fn = os.path.join( - out_dir, '{}.{}-nodes.{}-edges.txt'.format(prefix, len(nodes), - len(edges))) - with open(out_fn, 'w') as f: + out_dir, "{}.{}-nodes.{}-edges.txt".format(prefix, len(nodes), len(edges)) + ) + with open(out_fn, "w") as f: for edge in edges: - f.write(edge[0] + ' ' + edge[1] + '\n') + f.write(edge[0] + " " + edge[1] + "\n") def parse_rocketfuel_bb(in_dir, out_dir): @@ -47,12 +43,12 @@ def parse_rocketfuel_bb(in_dir, out_dir): continue asn = int(entry.name) - weight_fn = os.path.join(entry.path, 'weights.intra') + weight_fn = os.path.join(entry.path, "weights.intra") nodes = dict() edges = set() logging.info("Processing %s", weight_fn) - with open(weight_fn, 'r') as f: + with open(weight_fn, "r") as f: for line in f: tokens = line.split() if tokens[0] == tokens[1]: # self-to-self link @@ -64,17 +60,20 @@ def parse_rocketfuel_bb(in_dir, out_dir): edges.add((tokens[0], tokens[1])) out_fn = os.path.join( - out_dir, 'rocketfuel-bb-AS-{}.{}-nodes.{}-edges.txt'.format( - asn, len(nodes), len(edges))) - with open(out_fn, 'w') as f: + out_dir, + "rocketfuel-bb-AS-{}.{}-nodes.{}-edges.txt".format( + asn, len(nodes), len(edges) + ), + ) + with open(out_fn, "w") as f: for edge in edges: - f.write(nodes[edge[0]] + ' ' + nodes[edge[1]] + '\n') + f.write(nodes[edge[0]] + " " + nodes[edge[1]] + "\n") def parse_rocketfuel_cch(in_dir, out_dir): for entry in os.scandir(in_dir): - m = re.search('^(\\d+)\\.cch$', entry.name) - if m == None: + m = re.search("^(\\d+)\\.cch$", entry.name) + if m is None: continue asn = int(m.group(1)) @@ -82,25 +81,25 @@ def parse_rocketfuel_cch(in_dir, out_dir): edges = set() logging.info("Processing %s", entry.path) - with open(entry.path, 'r') as f: + with open(entry.path, "r") as f: for line in f: tokens = line.split() - if tokens[0].startswith('-'): # external nodes + if tokens[0].startswith("-"): # external nodes continue - if tokens[-1] not in ['r0', 'r1']: # only include r0, r1 links + if tokens[-1] not in ["r0", "r1"]: # only include r0, r1 links continue uid = tokens[0] i = 0 for token in tokens: i += 1 - if token == '->': + if token == "->": break - while (tokens[i].startswith('<') and tokens[i].endswith('>')): + while tokens[i].startswith("<") and tokens[i].endswith(">"): nuid = tokens[i][1:-1] i += 1 - if (uid == nuid): # self-to-self link + if uid == nuid: # self-to-self link continue nodes.add(uid) nodes.add(nuid) @@ -108,50 +107,55 @@ def parse_rocketfuel_cch(in_dir, out_dir): edges.add((uid, nuid)) out_fn = os.path.join( - out_dir, 'rocketfuel-cch-AS-{}.{}-nodes.{}-edges.txt'.format( - asn, len(nodes), len(edges))) - with open(out_fn, 'w') as f: + out_dir, + "rocketfuel-cch-AS-{}.{}-nodes.{}-edges.txt".format( + asn, len(nodes), len(edges) + ), + ) + with open(out_fn, "w") as f: for edge in edges: - f.write(edge[0] + ' ' + edge[1] + '\n') + f.write(edge[0] + " " + edge[1] + "\n") def main(): - parser = argparse.ArgumentParser(description='Parse network datasets') - parser.add_argument('-i', - '--in-dir', - help='Input dataset directory', - type=str, - action='store', - required=True) + parser = argparse.ArgumentParser(description="Parse network datasets") + parser.add_argument( + "-i", + "--in-dir", + help="Input dataset directory", + type=str, + action="store", + required=True, + ) parser.add_argument( - '-t', - '--type', - help='Dataset type', + "-t", + "--type", + help="Dataset type", type=str, - action='store', - choices=['stanford', 'rocketfuel-bb', 'rocketfuel-cch'], - required=True) + action="store", + choices=["stanford", "rocketfuel-bb", "rocketfuel-cch"], + required=True, + ) arg = parser.parse_args() - logging.basicConfig(level=logging.INFO, - format='[%(levelname)s] %(message)s') + logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s") in_dir = os.path.abspath(arg.in_dir) if not os.path.isdir(in_dir): raise Exception("'" + in_dir + "' is not a directory") base_dir = os.path.abspath(os.path.dirname(__file__)) - out_dir = os.path.join(base_dir, 'data') + out_dir = os.path.join(base_dir, "data") os.makedirs(out_dir, exist_ok=True) - if arg.type == 'stanford': + if arg.type == "stanford": parse_snap(in_dir, out_dir) - elif arg.type == 'rocketfuel-bb': + elif arg.type == "rocketfuel-bb": parse_rocketfuel_bb(in_dir, out_dir) - elif arg.type == 'rocketfuel-cch': + elif arg.type == "rocketfuel-cch": parse_rocketfuel_cch(in_dir, out_dir) else: - raise Exception('Unknown dataset type: ' + arg.type) + raise Exception("Unknown dataset type: " + arg.type) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/logparser.py b/examples/logparser.py index 53f362c8..3716d542 100755 --- a/examples/logparser.py +++ b/examples/logparser.py @@ -23,38 +23,47 @@ def parse_main_log(output_dir, settings): with open(mainlogFn) as mainlog: inv_id = 0 for line in mainlog: - if re.search("Loaded (\d+) nodes", line): - m = re.search("Loaded (\d+) nodes", line) + if re.search(r"Loaded (\d+) nodes", line): + m = re.search(r"Loaded (\d+) nodes", line) + assert m is not None settings["num_nodes"] = int(m.group(1)) elif " links" in line: - m = re.search("Loaded (\d+) links", line) + m = re.search(r"Loaded (\d+) links", line) + assert m is not None settings["num_links"] = int(m.group(1)) elif "openflow updates" in line: - m = re.search("Loaded (\d+) openflow updates", line) + m = re.search(r"Loaded (\d+) openflow updates", line) + assert m is not None settings["num_updates"] = int(m.group(1)) elif "Initial ECs:" in line: - m = re.search("Initial ECs: (\d+)", line) + m = re.search(r"Initial ECs: (\d+)", line) + assert m is not None settings["total_conn"] = int(m.group(1)) elif "Initial ports:" in line: - m = re.search("Initial ports: (\d+)", line) + m = re.search(r"Initial ports: (\d+)", line) + assert m is not None settings["total_conn"] *= int(m.group(1)) elif "Verifying invariant " in line: - m = re.search("(\d+)\.?\s*Verifying invariant ", line) + m = re.search(r"(\d+)\.?\s*Verifying invariant ", line) + assert m is not None inv_id = int(m.group(1)) settings["invariant"].append(inv_id) settings["violated"].append(False) # assert (len(settings['invariant']) == inv_id) elif "Connection ECs:" in line: - m = re.search("Connection ECs: (\d+)", line) + m = re.search(r"Connection ECs: (\d+)", line) + assert m is not None settings["independent_cec"].append(int(m.group(1))) # assert (len(settings['independent_cec']) == inv_id) elif "Invariant violated" in line: settings["violated"][inv_id - 1] = True elif "Time:" in line: - m = re.search("Time: (\d+) usec", line) + m = re.search(r"Time: (\d+) usec", line) + assert m is not None settings["total_time"] = int(m.group(1)) elif "Peak memory:" in line: - m = re.search("Peak memory: (\d+) KiB", line) + m = re.search(r"Peak memory: (\d+) KiB", line) + assert m is not None settings["total_mem"] = int(m.group(1)) @@ -68,8 +77,9 @@ def parse_02_settings(output_dir): } dirname = os.path.basename(output_dir) m = re.search( - "output\.(\d+)-apps\.(\d+)-hosts\.(\d+)-procs\.([a-z]+)(\.fault)?", dirname + r"output\.(\d+)-apps\.(\d+)-hosts\.(\d+)-procs\.([a-z]+)(\.fault)?", dirname ) + assert m is not None settings["apps"] = int(m.group(1)) settings["hosts"] = int(m.group(2)) settings["procs"] = int(m.group(3)) @@ -89,9 +99,10 @@ def parse_03_settings(output_dir): } dirname = os.path.basename(output_dir) m = re.search( - "output\.(\d+)-lbs\.(\d+)-servers\.algo-([a-z]+)\.(\d+)-procs\.([a-z]+)", + r"output\.(\d+)-lbs\.(\d+)-servers\.algo-([a-z]+)\.(\d+)-procs\.([a-z]+)", dirname, ) + assert m is not None settings["lbs"] = int(m.group(1)) settings["servers"] = int(m.group(2)) settings["algorithm"] = m.group(3) @@ -110,8 +121,9 @@ def parse_06_settings(output_dir): } dirname = os.path.basename(output_dir) m = re.search( - "output\.(\d+)-tenants\.(\d+)-updates\.(\d+)-procs\.([a-z]+)", dirname + r"output\.(\d+)-tenants\.(\d+)-updates\.(\d+)-procs\.([a-z]+)", dirname ) + assert m is not None settings["tenants"] = int(m.group(1)) settings["updates"] = int(m.group(2)) settings["procs"] = int(m.group(3)) @@ -130,9 +142,10 @@ def parse_15_settings(output_dir): } dirname = os.path.basename(output_dir) m = re.search( - "output\.([^\.]+)\.\d+-nodes\.\d+-edges\.(\d+)-emulated\.(\d+)-invariants\.(\d+)-procs\.([a-z]+)", + r"output\.([^\.]+)\.\d+-nodes\.\d+-edges\.(\d+)-emulated\.(\d+)-invariants\.(\d+)-procs\.([a-z]+)", dirname, ) + assert m is not None settings["network"] = m.group(1) settings["emulated_pct"] = int(m.group(2)) settings["invariants"] = int(m.group(3)) @@ -150,7 +163,10 @@ def parse_18_settings(output_dir): "drop_method": "", } dirname = os.path.basename(output_dir) - m = re.search("output\.(\d+)-ary\.(\d+)-update-pct\.(\d+)-procs\.([a-z]+)", dirname) + m = re.search( + r"output\.(\d+)-ary\.(\d+)-update-pct\.(\d+)-procs\.([a-z]+)", dirname + ) + assert m is not None settings["arity"] = int(m.group(1)) settings["update_pct"] = int(m.group(2)) settings["procs"] = int(m.group(3)) diff --git a/src/config.py b/src/config.py index 2cbda598..5327126e 100644 --- a/src/config.py +++ b/src/config.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 -from typing import Any -from typing import Optional +from typing import Any, Optional + import toml diff --git a/src/dataparse.py b/src/dataparse.py index 981f79bc..c8287b36 100644 --- a/src/dataparse.py +++ b/src/dataparse.py @@ -1,9 +1,10 @@ -import networkx as nx -import matplotlib.pyplot as plt import ipaddress from collections import deque from pathlib import Path +import matplotlib.pyplot as plt +import networkx as nx + class NetSynthesizer: def __init__(self, linkfile): @@ -118,14 +119,13 @@ def get_dst_interface(self, src, dst): def leaves(self): return bfs_leaves(self.G, list(self.G)[0]) - + def get_firewall(self, n): - ret = [] leaves = self.leaves() if n not in leaves: return -1 return list(self.G[n])[0] - + def get_itf_to_leaf(self, n): leaves = self.leaves() for i in self.node_to_interfaces[n]: @@ -133,60 +133,58 @@ def get_itf_to_leaf(self, n): dn = self.interface_to_node[di] if dn in leaves: return i[0] - - - - -class FileParser: - """ - nodefile : the nodes filename. The file must be space-separated, each line describing a single node. - node name at the first column, followed by the interfaces - - rulefile : the rules filename. The file must be space-separated, each line descriing a single rule. - node name at the first column, destination subnet in the second column, destination address at the third column. - """ - - def __init__(self, nodefile=None, rulefile=None, linkfile=None): - self.nodes = read_dsv(nodefile) - self.rules = read_dsv(rulefile) - self.ntoidict = dict() - self.itondict = dict() - - for line in self.nodes: - self.ntoidict[line[0]] = [] - for interface in line[1:]: - self.ntoidict[line[0]].append(interface) - self.itondict[interface] = line[0] - - def visualize(self): - G = nx.Graph() - for n in self.nodes: - G.add_node(n[0]) - for r in self.rules: - srcinterface = self.findMatchingInterface(r[0], r[1]) - dstnode = self.itondict[r[2]] - G.add_edge(r[0], dstnode, headlabel=srcinterface, taillabel=r[2]) - - pos = nx.spring_layout(G) - nx.draw_networkx(G, pos) - head_labels = nx.get_edge_attributes(G, "headlabel") - tail_labels = nx.get_edge_attributes(G, "taillabel") - - nx.draw_networkx_edge_labels( - G, pos, label_pos=0.25, edge_labels=head_labels, rotate=False - ) - nx.draw_networkx_edge_labels( - G, pos, label_pos=0.75, edge_labels=tail_labels, rotate=False - ) - - plt.axis("off") - plt.savefig("topo.png") - def findMatchingInterface(self, node, subnet): - for interface in self.ntoidict[node]: - if ipaddress.ip_address(interface) in ipaddress.ip_network(subnet): - return interface +# class FileParser: +# """ +# nodefile : the nodes filename. The file must be space-separated, each line describing a single node. +# node name at the first column, followed by the interfaces +# +# rulefile : the rules filename. The file must be space-separated, each line descriing a single rule. +# node name at the first column, destination subnet in the second column, destination address at the third column. +# """ +# +# def __init__(self, nodefile=None, rulefile=None, linkfile=None): +# self.nodes = read_dsv(nodefile) +# self.rules = read_dsv(rulefile) +# self.ntoidict = dict() +# self.itondict = dict() +# +# for line in self.nodes: +# self.ntoidict[line[0]] = [] +# for interface in line[1:]: +# self.ntoidict[line[0]].append(interface) +# self.itondict[interface] = line[0] +# +# def visualize(self): +# G = nx.Graph() +# for n in self.nodes: +# G.add_node(n[0]) +# for r in self.rules: +# srcinterface = self.findMatchingInterface(r[0], r[1]) +# dstnode = self.itondict[r[2]] +# G.add_edge(r[0], dstnode, headlabel=srcinterface, taillabel=r[2]) +# +# pos = nx.spring_layout(G) +# +# nx.draw_networkx(G, pos) +# head_labels = nx.get_edge_attributes(G, "headlabel") +# tail_labels = nx.get_edge_attributes(G, "taillabel") +# +# nx.draw_networkx_edge_labels( +# G, pos, label_pos=0.25, edge_labels=head_labels, rotate=False +# ) +# nx.draw_networkx_edge_labels( +# G, pos, label_pos=0.75, edge_labels=tail_labels, rotate=False +# ) +# +# plt.axis("off") +# plt.savefig("topo.png") +# +# def findMatchingInterface(self, node, subnet): +# for interface in self.ntoidict[node]: +# if ipaddress.ip_address(interface) in ipaddress.ip_network(subnet): +# return interface def BFSParent(G, s):