Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

https support #11

Open
gwen001 opened this issue Nov 8, 2016 · 0 comments
Open

https support #11

gwen001 opened this issue Nov 8, 2016 · 0 comments

Comments

@gwen001
Copy link

gwen001 commented Nov 8, 2016

Hi,

I had trouble to use Parsero with https so I made some little changes. It's the first time I wrote Python so I apologize if the code sucks...

Here is the diff:

diff --git a/parsero.py b/parsero.py
old mode 100644
new mode 100755
index 4ee24ef..a5dabea
--- a/parsero.py
+++ b/parsero.py
@@ -34,6 +34,7 @@ Author:
 
 class bcolors:
     OKGREEN = '\033[92m'
+    REDIR = '\033[37m'
     FAIL = '\033[91m'
     ENDC = '\033[0m'
     YELLOW = '\033[33m'
@@ -54,6 +55,7 @@ if sys.version_info < (3, 0, 0):
 import urllib.request
 import argparse
 import time
+import http.client
 
 try:
     import urllib3
@@ -76,15 +78,27 @@ def logo():
     print(bcolors.YELLOW + hello + bcolors.ENDC)
     now = time.strftime("%c")
 
-def conn_check(url, only200):
-    global pathlist
+def conn_check(url, only200, https):
+    global pathlist, http
     pathlist = []
     salida = 1
+
+    if https == True:
+        protocol = "https"
+        conn = http.client.HTTPSConnection(url)
+    else:
+        protocol = "http"
+        conn = http.client.HTTPConnection(url)
+    
     try:
-        for line in urllib.request.urlopen("http://" + url + "/robots.txt"):
-            lineStr = str(line, encoding='utf8')
+        conn.request("GET", "/robots.txt")
+        res = conn.getresponse()
+        data = str(res.read(), encoding='utf8')
+        datas = data.split('\n')
+        for line in datas:
+            lineStr = line
             path = lineStr.split(': /')
-            if "Disallow" == path[0]:
+            if ("Disallow" == path[0]) or ("Noindex" == path[0]):
                 pathlist.append(path[1].replace("\n", "").replace("\r", ""))
                 pathlist = list(set(pathlist))
             try:
@@ -99,21 +113,24 @@ def conn_check(url, only200):
         print("\n" + bcolors.FAIL + "Please, type a valid URL. This URL can't be resolved." + bcolors.ENDC)
         print("\n" + bcolors.FAIL + "e.g: python3 parsero.py -u www.behindthefirewalls.com -o -sb" + bcolors.ENDC + "\n")
         salida = 0
-
+    
     http = urllib3.PoolManager()
     count = 0
     count_ok = 0
-
+    
     for p in pathlist:
-        disurl = "http://" + url + '/' + p
-        r1 = http.request('GET', disurl, redirect=False, retries=5)
+        disurl = protocol+"://"+url+'/'+p
+        r1 = http.request('GET', disurl, redirect = False, retries = 5)
+        count = count + 1
         if r1.status == 200:
             print(bcolors.OKGREEN + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC)
             count_ok = count_ok + 1
         elif only200 == False:
-            print(bcolors.FAIL + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC)
-        count = count + 1
-
+            if r1.status >= 300 and r1.status < 400:
+                print(bcolors.REDIR + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC)
+            else:
+                print(bcolors.FAIL + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC)
+    
     count_int = int(count)
     count_ok_int = int(count_ok)
 
@@ -127,21 +144,25 @@ def conn_check(url, only200):
         else:
             print('\n' + bcolors.FAIL + '[+] %i links have been analyzed but any them are available...' % count_int + bcolors.ENDC)
 
-def search_bing(url, searchbing, only200):
+def search_bing(url, searchbing, only200, https):
+    if https == True:
+        protocol = "https"
+    else:
+        protocol = "http"
     try:
         print("\nSearching the Disallows entries in Bing...\n")
         from bs4 import BeautifulSoup
 
         count = 0
         for p in pathlist:
-            disurl = "http://" + url + '/' + p
+            disurl = protocol+"://" + url + '/' + p
             opener = urllib.request.build_opener()
             opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0')]
             url2 = "http://www.bing.com/search?q=site:" + disurl
             print(url2)
 
             page = opener.open(url2)
-            soup = BeautifulSoup(page)
+            soup = BeautifulSoup(page, 'lxml')
 
             http = urllib3.PoolManager()
             for cite in soup.findAll('cite'):
@@ -152,7 +173,10 @@ def search_bing(url, searchbing, only200):
                         if r2.status == 200:
                             print(bcolors.OKGREEN + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC)
                         elif only200 == False:
-                            print(bcolors.FAIL + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC)
+                            if r2.status >= 300 and r2.status < 400:
+                                print(bcolors.REDIR + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC)
+                            else:
+                                print(bcolors.FAIL + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC)
                 except UnicodeEncodeError:
                     pass
 
@@ -170,6 +194,7 @@ def main():
     parse = argparse.ArgumentParser()
     parse.add_argument('-u', action='store', dest='url', help='Type the URL which will be analyzed')
     parse.add_argument('-o', action='store_true', dest='only200', help='Show only the "HTTP 200" status code')
+    parse.add_argument('-s', action='store_true', dest='https', help='Enable https')
     parse.add_argument('-sb', action='store_true', dest='searchbing', help='Search in Bing indexed Disallows')
     parse.add_argument('-f', action='store', dest='file', help='Scan a list of domains from a list')
 
@@ -204,12 +229,13 @@ def main():
         if url.find("http://") == 0:
             url = url.replace("http://", "")
         start_time = time.time()
+        https = args.https
         only200 = args.only200
         searchbing = args.searchbing
         date(url)
-        conn_check(url, only200)
+        conn_check(url, only200, https)
         if searchbing == True:
-            search_bing(url, searchbing, only200)
+            search_bing(url, searchbing, only200, https)
         print("\nFinished in %0.2f seconds.\n" % (time.time() - start_time))
 
 if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant