From e690e36a4761f3d2f8cb1d869489b3172c6c999a Mon Sep 17 00:00:00 2001
From: kyechou <kuanyenchou@gmail.com>
Date: Sun, 28 Jul 2024 11:10:36 -0500
Subject: [PATCH] cleanup

---
 .../15-real-networks/parse-network-dataset.py | 106 +++++++++--------
 examples/logparser.py                         |  46 ++++---
 src/config.py                                 |   4 +-
 src/dataparse.py                              | 112 +++++++++---------
 4 files changed, 143 insertions(+), 125 deletions(-)

diff --git a/examples/15-real-networks/parse-network-dataset.py b/examples/15-real-networks/parse-network-dataset.py
index 7c251535..e8ec4571 100644
--- a/examples/15-real-networks/parse-network-dataset.py
+++ b/examples/15-real-networks/parse-network-dataset.py
@@ -1,30 +1,26 @@
 #!/usr/bin/env python3
 
+import argparse
 import logging
 import os
 import re
-import sys
-import argparse
-
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../src'))
-from dataparse import *
 
 
 def parse_snap(in_dir, out_dir):
     prefix = os.path.basename(in_dir)
 
     for entry in os.scandir(in_dir):
-        if not entry.name.endswith('.txt'):
+        if not entry.name.endswith(".txt"):
             continue
 
         nodes = set()
         edges = set()
         logging.info("Processing %s", entry.path)
 
-        with open(entry.path, 'r') as f:
+        with open(entry.path, "r") as f:
             for line in f:
                 tokens = line.split()
-                if tokens[0] == '#':
+                if tokens[0] == "#":
                     continue
                 if tokens[0] == tokens[1]:  # self-to-self link
                     continue
@@ -34,11 +30,11 @@ def parse_snap(in_dir, out_dir):
                     edges.add((tokens[0], tokens[1]))
 
         out_fn = os.path.join(
-            out_dir, '{}.{}-nodes.{}-edges.txt'.format(prefix, len(nodes),
-                                                       len(edges)))
-        with open(out_fn, 'w') as f:
+            out_dir, "{}.{}-nodes.{}-edges.txt".format(prefix, len(nodes), len(edges))
+        )
+        with open(out_fn, "w") as f:
             for edge in edges:
-                f.write(edge[0] + ' ' + edge[1] + '\n')
+                f.write(edge[0] + " " + edge[1] + "\n")
 
 
 def parse_rocketfuel_bb(in_dir, out_dir):
@@ -47,12 +43,12 @@ def parse_rocketfuel_bb(in_dir, out_dir):
             continue
 
         asn = int(entry.name)
-        weight_fn = os.path.join(entry.path, 'weights.intra')
+        weight_fn = os.path.join(entry.path, "weights.intra")
         nodes = dict()
         edges = set()
         logging.info("Processing %s", weight_fn)
 
-        with open(weight_fn, 'r') as f:
+        with open(weight_fn, "r") as f:
             for line in f:
                 tokens = line.split()
                 if tokens[0] == tokens[1]:  # self-to-self link
@@ -64,17 +60,20 @@ def parse_rocketfuel_bb(in_dir, out_dir):
                     edges.add((tokens[0], tokens[1]))
 
         out_fn = os.path.join(
-            out_dir, 'rocketfuel-bb-AS-{}.{}-nodes.{}-edges.txt'.format(
-                asn, len(nodes), len(edges)))
-        with open(out_fn, 'w') as f:
+            out_dir,
+            "rocketfuel-bb-AS-{}.{}-nodes.{}-edges.txt".format(
+                asn, len(nodes), len(edges)
+            ),
+        )
+        with open(out_fn, "w") as f:
             for edge in edges:
-                f.write(nodes[edge[0]] + ' ' + nodes[edge[1]] + '\n')
+                f.write(nodes[edge[0]] + " " + nodes[edge[1]] + "\n")
 
 
 def parse_rocketfuel_cch(in_dir, out_dir):
     for entry in os.scandir(in_dir):
-        m = re.search('^(\\d+)\\.cch$', entry.name)
-        if m == None:
+        m = re.search("^(\\d+)\\.cch$", entry.name)
+        if m is None:
             continue
 
         asn = int(m.group(1))
@@ -82,25 +81,25 @@ def parse_rocketfuel_cch(in_dir, out_dir):
         edges = set()
         logging.info("Processing %s", entry.path)
 
-        with open(entry.path, 'r') as f:
+        with open(entry.path, "r") as f:
             for line in f:
                 tokens = line.split()
-                if tokens[0].startswith('-'):  # external nodes
+                if tokens[0].startswith("-"):  # external nodes
                     continue
-                if tokens[-1] not in ['r0', 'r1']:  # only include r0, r1 links
+                if tokens[-1] not in ["r0", "r1"]:  # only include r0, r1 links
                     continue
 
                 uid = tokens[0]
                 i = 0
                 for token in tokens:
                     i += 1
-                    if token == '->':
+                    if token == "->":
                         break
-                while (tokens[i].startswith('<') and tokens[i].endswith('>')):
+                while tokens[i].startswith("<") and tokens[i].endswith(">"):
                     nuid = tokens[i][1:-1]
                     i += 1
 
-                    if (uid == nuid):  # self-to-self link
+                    if uid == nuid:  # self-to-self link
                         continue
                     nodes.add(uid)
                     nodes.add(nuid)
@@ -108,50 +107,55 @@ def parse_rocketfuel_cch(in_dir, out_dir):
                         edges.add((uid, nuid))
 
         out_fn = os.path.join(
-            out_dir, 'rocketfuel-cch-AS-{}.{}-nodes.{}-edges.txt'.format(
-                asn, len(nodes), len(edges)))
-        with open(out_fn, 'w') as f:
+            out_dir,
+            "rocketfuel-cch-AS-{}.{}-nodes.{}-edges.txt".format(
+                asn, len(nodes), len(edges)
+            ),
+        )
+        with open(out_fn, "w") as f:
             for edge in edges:
-                f.write(edge[0] + ' ' + edge[1] + '\n')
+                f.write(edge[0] + " " + edge[1] + "\n")
 
 
 def main():
-    parser = argparse.ArgumentParser(description='Parse network datasets')
-    parser.add_argument('-i',
-                        '--in-dir',
-                        help='Input dataset directory',
-                        type=str,
-                        action='store',
-                        required=True)
+    parser = argparse.ArgumentParser(description="Parse network datasets")
+    parser.add_argument(
+        "-i",
+        "--in-dir",
+        help="Input dataset directory",
+        type=str,
+        action="store",
+        required=True,
+    )
     parser.add_argument(
-        '-t',
-        '--type',
-        help='Dataset type',
+        "-t",
+        "--type",
+        help="Dataset type",
         type=str,
-        action='store',
-        choices=['stanford', 'rocketfuel-bb', 'rocketfuel-cch'],
-        required=True)
+        action="store",
+        choices=["stanford", "rocketfuel-bb", "rocketfuel-cch"],
+        required=True,
+    )
     arg = parser.parse_args()
 
-    logging.basicConfig(level=logging.INFO,
-                        format='[%(levelname)s] %(message)s')
+    logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
 
     in_dir = os.path.abspath(arg.in_dir)
     if not os.path.isdir(in_dir):
         raise Exception("'" + in_dir + "' is not a directory")
     base_dir = os.path.abspath(os.path.dirname(__file__))
-    out_dir = os.path.join(base_dir, 'data')
+    out_dir = os.path.join(base_dir, "data")
     os.makedirs(out_dir, exist_ok=True)
 
-    if arg.type == 'stanford':
+    if arg.type == "stanford":
         parse_snap(in_dir, out_dir)
-    elif arg.type == 'rocketfuel-bb':
+    elif arg.type == "rocketfuel-bb":
         parse_rocketfuel_bb(in_dir, out_dir)
-    elif arg.type == 'rocketfuel-cch':
+    elif arg.type == "rocketfuel-cch":
         parse_rocketfuel_cch(in_dir, out_dir)
     else:
-        raise Exception('Unknown dataset type: ' + arg.type)
+        raise Exception("Unknown dataset type: " + arg.type)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/examples/logparser.py b/examples/logparser.py
index 53f362c8..3716d542 100755
--- a/examples/logparser.py
+++ b/examples/logparser.py
@@ -23,38 +23,47 @@ def parse_main_log(output_dir, settings):
     with open(mainlogFn) as mainlog:
         inv_id = 0
         for line in mainlog:
-            if re.search("Loaded (\d+) nodes", line):
-                m = re.search("Loaded (\d+) nodes", line)
+            if re.search(r"Loaded (\d+) nodes", line):
+                m = re.search(r"Loaded (\d+) nodes", line)
+                assert m is not None
                 settings["num_nodes"] = int(m.group(1))
             elif " links" in line:
-                m = re.search("Loaded (\d+) links", line)
+                m = re.search(r"Loaded (\d+) links", line)
+                assert m is not None
                 settings["num_links"] = int(m.group(1))
             elif "openflow updates" in line:
-                m = re.search("Loaded (\d+) openflow updates", line)
+                m = re.search(r"Loaded (\d+) openflow updates", line)
+                assert m is not None
                 settings["num_updates"] = int(m.group(1))
             elif "Initial ECs:" in line:
-                m = re.search("Initial ECs: (\d+)", line)
+                m = re.search(r"Initial ECs: (\d+)", line)
+                assert m is not None
                 settings["total_conn"] = int(m.group(1))
             elif "Initial ports:" in line:
-                m = re.search("Initial ports: (\d+)", line)
+                m = re.search(r"Initial ports: (\d+)", line)
+                assert m is not None
                 settings["total_conn"] *= int(m.group(1))
             elif "Verifying invariant " in line:
-                m = re.search("(\d+)\.?\s*Verifying invariant ", line)
+                m = re.search(r"(\d+)\.?\s*Verifying invariant ", line)
+                assert m is not None
                 inv_id = int(m.group(1))
                 settings["invariant"].append(inv_id)
                 settings["violated"].append(False)
                 # assert (len(settings['invariant']) == inv_id)
             elif "Connection ECs:" in line:
-                m = re.search("Connection ECs: (\d+)", line)
+                m = re.search(r"Connection ECs: (\d+)", line)
+                assert m is not None
                 settings["independent_cec"].append(int(m.group(1)))
                 # assert (len(settings['independent_cec']) == inv_id)
             elif "Invariant violated" in line:
                 settings["violated"][inv_id - 1] = True
             elif "Time:" in line:
-                m = re.search("Time: (\d+) usec", line)
+                m = re.search(r"Time: (\d+) usec", line)
+                assert m is not None
                 settings["total_time"] = int(m.group(1))
             elif "Peak memory:" in line:
-                m = re.search("Peak memory: (\d+) KiB", line)
+                m = re.search(r"Peak memory: (\d+) KiB", line)
+                assert m is not None
                 settings["total_mem"] = int(m.group(1))
 
 
@@ -68,8 +77,9 @@ def parse_02_settings(output_dir):
     }
     dirname = os.path.basename(output_dir)
     m = re.search(
-        "output\.(\d+)-apps\.(\d+)-hosts\.(\d+)-procs\.([a-z]+)(\.fault)?", dirname
+        r"output\.(\d+)-apps\.(\d+)-hosts\.(\d+)-procs\.([a-z]+)(\.fault)?", dirname
     )
+    assert m is not None
     settings["apps"] = int(m.group(1))
     settings["hosts"] = int(m.group(2))
     settings["procs"] = int(m.group(3))
@@ -89,9 +99,10 @@ def parse_03_settings(output_dir):
     }
     dirname = os.path.basename(output_dir)
     m = re.search(
-        "output\.(\d+)-lbs\.(\d+)-servers\.algo-([a-z]+)\.(\d+)-procs\.([a-z]+)",
+        r"output\.(\d+)-lbs\.(\d+)-servers\.algo-([a-z]+)\.(\d+)-procs\.([a-z]+)",
         dirname,
     )
+    assert m is not None
     settings["lbs"] = int(m.group(1))
     settings["servers"] = int(m.group(2))
     settings["algorithm"] = m.group(3)
@@ -110,8 +121,9 @@ def parse_06_settings(output_dir):
     }
     dirname = os.path.basename(output_dir)
     m = re.search(
-        "output\.(\d+)-tenants\.(\d+)-updates\.(\d+)-procs\.([a-z]+)", dirname
+        r"output\.(\d+)-tenants\.(\d+)-updates\.(\d+)-procs\.([a-z]+)", dirname
     )
+    assert m is not None
     settings["tenants"] = int(m.group(1))
     settings["updates"] = int(m.group(2))
     settings["procs"] = int(m.group(3))
@@ -130,9 +142,10 @@ def parse_15_settings(output_dir):
     }
     dirname = os.path.basename(output_dir)
     m = re.search(
-        "output\.([^\.]+)\.\d+-nodes\.\d+-edges\.(\d+)-emulated\.(\d+)-invariants\.(\d+)-procs\.([a-z]+)",
+        r"output\.([^\.]+)\.\d+-nodes\.\d+-edges\.(\d+)-emulated\.(\d+)-invariants\.(\d+)-procs\.([a-z]+)",
         dirname,
     )
+    assert m is not None
     settings["network"] = m.group(1)
     settings["emulated_pct"] = int(m.group(2))
     settings["invariants"] = int(m.group(3))
@@ -150,7 +163,10 @@ def parse_18_settings(output_dir):
         "drop_method": "",
     }
     dirname = os.path.basename(output_dir)
-    m = re.search("output\.(\d+)-ary\.(\d+)-update-pct\.(\d+)-procs\.([a-z]+)", dirname)
+    m = re.search(
+        r"output\.(\d+)-ary\.(\d+)-update-pct\.(\d+)-procs\.([a-z]+)", dirname
+    )
+    assert m is not None
     settings["arity"] = int(m.group(1))
     settings["update_pct"] = int(m.group(2))
     settings["procs"] = int(m.group(3))
diff --git a/src/config.py b/src/config.py
index 2cbda598..5327126e 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python3
 
-from typing import Any
-from typing import Optional
+from typing import Any, Optional
+
 import toml
 
 
diff --git a/src/dataparse.py b/src/dataparse.py
index 981f79bc..c8287b36 100644
--- a/src/dataparse.py
+++ b/src/dataparse.py
@@ -1,9 +1,10 @@
-import networkx as nx
-import matplotlib.pyplot as plt
 import ipaddress
 from collections import deque
 from pathlib import Path
 
+import matplotlib.pyplot as plt
+import networkx as nx
+
 
 class NetSynthesizer:
     def __init__(self, linkfile):
@@ -118,14 +119,13 @@ def get_dst_interface(self, src, dst):
 
     def leaves(self):
         return bfs_leaves(self.G, list(self.G)[0])
-    
+
     def get_firewall(self, n):
-        ret = []
         leaves = self.leaves()
         if n not in leaves:
             return -1
         return list(self.G[n])[0]
-    
+
     def get_itf_to_leaf(self, n):
         leaves = self.leaves()
         for i in self.node_to_interfaces[n]:
@@ -133,60 +133,58 @@ def get_itf_to_leaf(self, n):
             dn = self.interface_to_node[di]
             if dn in leaves:
                 return i[0]
-        
-            
-        
-
-class FileParser:
-    """
-    nodefile : the nodes filename. The file must be space-separated, each line describing a single node.
-                node name at the first column, followed by the interfaces
-
-    rulefile : the rules filename. The file must be space-separated, each line descriing a single rule.
-                node name at the first column, destination subnet in the second column, destination address at the third column.
-    """
-
-    def __init__(self, nodefile=None, rulefile=None, linkfile=None):
-        self.nodes = read_dsv(nodefile)
-        self.rules = read_dsv(rulefile)
-        self.ntoidict = dict()
-        self.itondict = dict()
-
-        for line in self.nodes:
-            self.ntoidict[line[0]] = []
-            for interface in line[1:]:
-                self.ntoidict[line[0]].append(interface)
-                self.itondict[interface] = line[0]
-
-    def visualize(self):
-        G = nx.Graph()
-        for n in self.nodes:
-            G.add_node(n[0])
-        for r in self.rules:
-            srcinterface = self.findMatchingInterface(r[0], r[1])
-            dstnode = self.itondict[r[2]]
-            G.add_edge(r[0], dstnode, headlabel=srcinterface, taillabel=r[2])
-
-        pos = nx.spring_layout(G)
 
-        nx.draw_networkx(G, pos)
-        head_labels = nx.get_edge_attributes(G, "headlabel")
-        tail_labels = nx.get_edge_attributes(G, "taillabel")
-
-        nx.draw_networkx_edge_labels(
-            G, pos, label_pos=0.25, edge_labels=head_labels, rotate=False
-        )
-        nx.draw_networkx_edge_labels(
-            G, pos, label_pos=0.75, edge_labels=tail_labels, rotate=False
-        )
-
-        plt.axis("off")
-        plt.savefig("topo.png")
 
-    def findMatchingInterface(self, node, subnet):
-        for interface in self.ntoidict[node]:
-            if ipaddress.ip_address(interface) in ipaddress.ip_network(subnet):
-                return interface
+# class FileParser:
+#     """
+#     nodefile : the nodes filename. The file must be space-separated, each line describing a single node.
+#                 node name at the first column, followed by the interfaces
+#
+#     rulefile : the rules filename. The file must be space-separated, each line descriing a single rule.
+#                 node name at the first column, destination subnet in the second column, destination address at the third column.
+#     """
+#
+#     def __init__(self, nodefile=None, rulefile=None, linkfile=None):
+#         self.nodes = read_dsv(nodefile)
+#         self.rules = read_dsv(rulefile)
+#         self.ntoidict = dict()
+#         self.itondict = dict()
+#
+#         for line in self.nodes:
+#             self.ntoidict[line[0]] = []
+#             for interface in line[1:]:
+#                 self.ntoidict[line[0]].append(interface)
+#                 self.itondict[interface] = line[0]
+#
+#     def visualize(self):
+#         G = nx.Graph()
+#         for n in self.nodes:
+#             G.add_node(n[0])
+#         for r in self.rules:
+#             srcinterface = self.findMatchingInterface(r[0], r[1])
+#             dstnode = self.itondict[r[2]]
+#             G.add_edge(r[0], dstnode, headlabel=srcinterface, taillabel=r[2])
+#
+#         pos = nx.spring_layout(G)
+#
+#         nx.draw_networkx(G, pos)
+#         head_labels = nx.get_edge_attributes(G, "headlabel")
+#         tail_labels = nx.get_edge_attributes(G, "taillabel")
+#
+#         nx.draw_networkx_edge_labels(
+#             G, pos, label_pos=0.25, edge_labels=head_labels, rotate=False
+#         )
+#         nx.draw_networkx_edge_labels(
+#             G, pos, label_pos=0.75, edge_labels=tail_labels, rotate=False
+#         )
+#
+#         plt.axis("off")
+#         plt.savefig("topo.png")
+#
+#     def findMatchingInterface(self, node, subnet):
+#         for interface in self.ntoidict[node]:
+#             if ipaddress.ip_address(interface) in ipaddress.ip_network(subnet):
+#                 return interface
 
 
 def BFSParent(G, s):