CAIDA · vgiotsas · Dec 17, 2017 · Dec 17, 2017 · Dec 17, 2017 · Dec 17, 2017
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+lg-logs/*
diff --git a/BgpParser.py b/BgpParser.py
diff --git a/Helper.py b/Helper.py
@@ -1,5 +1,6 @@
 __author__ = "Vasilis Giotsas"
 __email__ = "<[email protected]>"
+
 # This software is Copyright (C) 2015 The Regents of the University of
 # California. All Rights Reserved. Permission to copy, modify, and
 # distribute this software and its documentation for educational, research
@@ -22,7 +23,7 @@
 #
 # This software program and documentation are copyrighted by The Regents of
 # the University of California. The software program and documentation are
-# supplied “as is”, without any accompanying services from The Regents. The
+# supplied "as is", without any accompanying services from The Regents. The
 # Regents does not warrant that the operation of the program will be
 # uninterrupted or error-free. The end-user understands that the program
 # was developed for research purposes and is advised not to rely
@@ -35,18 +36,19 @@
 # THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY
 # DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
-# SOFTWARE PROVIDED HEREUNDER IS ON AN “AS IS” BASIS, AND THE UNIVERSITY OF
+# SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
 # CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
 # ENHANCEMENTS, OR MODIFICATIONS.
 
-from netaddr import *
 import os
+from netaddr import *
+
 
 class Helper:
     """
     Collection of helper classes used throughout the program
     """
-    
+
     @staticmethod
     def getIPNetwork(ipnet):
         """

diff --git a/HttpQueryHandler.py b/HttpQueryHandler.py
@@ -1,5 +1,7 @@
+
 __author__ = "Vasilis Giotsas"
 __email__ = "<[email protected]>"
+
 # This software is Copyright (C) 2015 The Regents of the University of
 # California. All Rights Reserved. Permission to copy, modify, and
 # distribute this software and its documentation for educational, research
@@ -22,7 +24,7 @@
 #
 # This software program and documentation are copyrighted by The Regents of
 # the University of California. The software program and documentation are
-# supplied $B!H(Bas is$B!I(B, without any accompanying services from The Regents. The
+# supplied "as is", without any accompanying services from The Regents. The
 # Regents does not warrant that the operation of the program will be
 # uninterrupted or error-free. The end-user understands that the program
 # was developed for research purposes and is advised not to rely
@@ -35,18 +37,20 @@
 # THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY
 # DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
-# SOFTWARE PROVIDED HEREUNDER IS ON AN $B!H(BAS IS$B!I(B BASIS, AND THE UNIVERSITY OF
+# SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
 # CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
 # ENHANCEMENTS, OR MODIFICATIONS.
 
-import urllib, urllib2, time, re
-from bs4 import BeautifulSoup 
+import urllib
+import urllib2
+import time
+import re
+from bs4 import BeautifulSoup
 from HTMLParser import HTMLParser
 from pprint import pprint
 import sys
 import json
 import httplib
-import random
 import socket
 from StringIO import StringIO
 import pycurl
@@ -59,11 +63,14 @@ class MLStripper(HTMLParser):
     """
     Methods to strip html tags from string
     """
+
     def __init__(self):
         self.reset()
         self.fed = []
+
     def handle_data(self, d):
         self.fed.append(d)
+
     def get_data(self):
         return ' '.join(self.fed)
 
@@ -72,6 +79,7 @@ class HttpQueryHandler(object):
     """
     Methods to handle issue of HTTP queries and parsing of the responses
     """
+
     def __init__(self):
         pass
 
@@ -84,12 +92,12 @@ def strip_tags(html):
     @staticmethod
     def remove_break_line(html_txt):
         result = ""
-        tokens = re.split("<br>|<br />|<br/>",html_txt)
+        tokens = re.split("<br>|<br />|<br/>", html_txt)
         for token in tokens:
             result = result + token.strip() + "\n"
-        result = result.replace("&nbsp;","")
+        result = result.replace("&nbsp;", "")
         return result
-         
+
     @staticmethod
     def send_http_request(params, url, referrer, verb):
         user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17'
@@ -105,9 +113,9 @@ def send_http_request(params, url, referrer, verb):
                 if url == "http://noc.ucomline.net/lg1":
                     headers.append("Cookie: PHPSESSID=199d19597747f15a4d778d68b56d4ab7")
                 post_fields = urllib.urlencode(params)
-                pprint( post_fields)
+                pprint(post_fields)
                 curl_connector.setopt(pycurl.HTTPHEADER, headers)
-                curl_connector.setopt(curl_connector.URL,  url)
+                curl_connector.setopt(curl_connector.URL, url)
                 curl_connector.setopt(curl_connector.POSTFIELDS, post_fields)
             elif verb == "get":
                 headers = ['User-Agent: ' + user_agent,
@@ -116,7 +124,7 @@ def send_http_request(params, url, referrer, verb):
                            'Cache-Control: max-age=0']
                 url += "?"
                 counter = 1
-                for key,value in params.items():
+                for key, value in params.items():
                     counter += 1
                     if key == "zeroparam":
                         url += value
@@ -138,12 +146,11 @@ def send_http_request(params, url, referrer, verb):
             content = storage.getvalue()
         except pycurl.error, e:
             error_code, error_text = e.args
-            print 'We got an error. Code: %s, Text:%s'%(error_code, error_text)
+            print 'We got an error. Code: %s, Text:%s' % (error_code, error_text)
             if str(error_code) != "18":
                 return -1
             else:
                 content = storage.getvalue()
-        #print content
         return content
 
     @staticmethod
@@ -172,15 +179,18 @@ def send_http_request_alt(params, url, referrer, verb):
                                'Connection': 'keep-alive'}
                     url += "?"
                     counter = 1
-                    for key,value in params.items():
+                    for key, value in params.items():
                         counter += 1
                         if key == "zeroparam":
                             url += value
+                        elif key == "lg_query_arg":
+                            continue
                         else:
                             url += key + "=" + value
-                        if counter <= len(params):
+                        if counter <= len(params) - 1:
                             url += "&"
                     url = url.replace(" ", "%20")
+                    print url
                     req = urllib2.Request(url, None, headers)
                 response = urllib2.urlopen(req, timeout=timeout)
                 try:
@@ -203,7 +213,7 @@ def send_http_request_alt(params, url, referrer, verb):
         return the_page
 
     @classmethod
-    def parse_html(cls, html_text, lg_html):
+    def parse_html(cls, html_text, lg_html, asn):
         """
         Parse the HTML of the reply to extract the LG output
         """
@@ -215,24 +225,47 @@ def parse_html(cls, html_text, lg_html):
         insert_br = lg_html["insertbr"]
         contents = ""
         try:
+            soup = BeautifulSoup(html_text, "html.parser")
             if html_tag == "result":
-                soup = BeautifulSoup(html_text)
                 pre = soup.findAll("div", {"class": "result"})
                 pre = pre[1]
                 reply = ''.join(str(pre.contents))
-                contents = BeautifulSoup(reply)
+                contents = BeautifulSoup(reply, "html.parser")
             elif html_tag == "json":
+                decoded_html = None
                 decoded = json.loads(html_text)
                 if 'lg' in decoded:
                     contents = decoded['lg']
+                elif 'title' in decoded and "rs1.ripn.net" in decoded['title']:
+                    if 'txt' in decoded:
+                        if "BGP information for neighbor" in decoded['txt'][0]:
+                            decoded_html = '\n'.join(decoded['txt'][14:])
+                        else:
+                            decoded_html = '\n'.join(decoded['txt'])
+                        sanitized_html = decoded_html.replace("<t>", "\t")
+                        soup = BeautifulSoup(sanitized_html, "html.parser")
+                        # html_pretty = soup.prettify()
+                        # soup = BeautifulSoup(html_pretty, "html.parser")
+                        return soup.get_text()
+                else:
+                    contents = decoded
+            elif html_tag == "table":
+                if html_id != "":
+                    table_rows = soup.find("table", {"id": html_id}).find_all('tr')
+                else:
+                    table_rows = soup.find_all("table")[tag_index].find_all('tr')
+                for tr in table_rows:
+                    if len(contents) == 0:
+                        contents += tr.text.replace("\n", "\t")
+                    else:
+                        contents += "\n" + tr.text.replace("\n", "\t")
             elif tag_index == -1:
                 contents = cls.remove_break_line(html_text)
             elif tag_index == -2:
                 contents = html_text
             else:
-                soup = BeautifulSoup(html_text)
                 html_pretty = soup.prettify()
-                soup = BeautifulSoup(html_pretty)
+                soup = BeautifulSoup(html_pretty, "html.parser")
                 try:
                     if html_class == "" and html_id == "":
                         pre = soup.findAll(html_tag)[tag_index]
@@ -250,18 +283,23 @@ def parse_html(cls, html_text, lg_html):
                             contents += "\n" + str(content.strip())
                         else:
                             contents += str(content.strip())
-                    contents = BeautifulSoup(contents)
+                    contents = BeautifulSoup(contents, "html.parser")
                     contents = cls.remove_break_line(contents.encode('utf-8').strip())
                 else:
-                    if len(pre.contents) > content_index:
+                    if asn == "59900":
+                        pre = soup.find_all(html_tag)[tag_index]
+                        pre = str(pre).replace("<br>", "\n")
+                        pre = str(pre).replace("\r", "\n")
+                        soup = BeautifulSoup(pre, 'html.parser')
+                        contents = soup.text
+                    elif len(pre.contents) > content_index:
                         contents = pre.contents[content_index]
                 contents = cls.remove_break_line(contents.encode('utf-8').strip())
         except AttributeError:
             return -1
 
         # specific cases
         if html_tag == "table" and tag_index == 0:
-            contents = contents.replace("<tr>","\n")
-            contents = contents.replace("</tr>","")
+            contents = contents.replace("<tr>", "\n")
+            contents = contents.replace("</tr>", "")
         return contents
-