We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hi,
I had trouble to use Parsero with https so I made some little changes. It's the first time I wrote Python so I apologize if the code sucks...
Here is the diff:
diff --git a/parsero.py b/parsero.py old mode 100644 new mode 100755 index 4ee24ef..a5dabea --- a/parsero.py +++ b/parsero.py @@ -34,6 +34,7 @@ Author: class bcolors: OKGREEN = '\033[92m' + REDIR = '\033[37m' FAIL = '\033[91m' ENDC = '\033[0m' YELLOW = '\033[33m' @@ -54,6 +55,7 @@ if sys.version_info < (3, 0, 0): import urllib.request import argparse import time +import http.client try: import urllib3 @@ -76,15 +78,27 @@ def logo(): print(bcolors.YELLOW + hello + bcolors.ENDC) now = time.strftime("%c") -def conn_check(url, only200): - global pathlist +def conn_check(url, only200, https): + global pathlist, http pathlist = [] salida = 1 + + if https == True: + protocol = "https" + conn = http.client.HTTPSConnection(url) + else: + protocol = "http" + conn = http.client.HTTPConnection(url) + try: - for line in urllib.request.urlopen("http://" + url + "/robots.txt"): - lineStr = str(line, encoding='utf8') + conn.request("GET", "/robots.txt") + res = conn.getresponse() + data = str(res.read(), encoding='utf8') + datas = data.split('\n') + for line in datas: + lineStr = line path = lineStr.split(': /') - if "Disallow" == path[0]: + if ("Disallow" == path[0]) or ("Noindex" == path[0]): pathlist.append(path[1].replace("\n", "").replace("\r", "")) pathlist = list(set(pathlist)) try: @@ -99,21 +113,24 @@ def conn_check(url, only200): print("\n" + bcolors.FAIL + "Please, type a valid URL. This URL can't be resolved." + bcolors.ENDC) print("\n" + bcolors.FAIL + "e.g: python3 parsero.py -u www.behindthefirewalls.com -o -sb" + bcolors.ENDC + "\n") salida = 0 - + http = urllib3.PoolManager() count = 0 count_ok = 0 - + for p in pathlist: - disurl = "http://" + url + '/' + p - r1 = http.request('GET', disurl, redirect=False, retries=5) + disurl = protocol+"://"+url+'/'+p + r1 = http.request('GET', disurl, redirect = False, retries = 5) + count = count + 1 if r1.status == 200: print(bcolors.OKGREEN + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC) count_ok = count_ok + 1 elif only200 == False: - print(bcolors.FAIL + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC) - count = count + 1 - + if r1.status >= 300 and r1.status < 400: + print(bcolors.REDIR + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC) + else: + print(bcolors.FAIL + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC) + count_int = int(count) count_ok_int = int(count_ok) @@ -127,21 +144,25 @@ def conn_check(url, only200): else: print('\n' + bcolors.FAIL + '[+] %i links have been analyzed but any them are available...' % count_int + bcolors.ENDC) -def search_bing(url, searchbing, only200): +def search_bing(url, searchbing, only200, https): + if https == True: + protocol = "https" + else: + protocol = "http" try: print("\nSearching the Disallows entries in Bing...\n") from bs4 import BeautifulSoup count = 0 for p in pathlist: - disurl = "http://" + url + '/' + p + disurl = protocol+"://" + url + '/' + p opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0')] url2 = "http://www.bing.com/search?q=site:" + disurl print(url2) page = opener.open(url2) - soup = BeautifulSoup(page) + soup = BeautifulSoup(page, 'lxml') http = urllib3.PoolManager() for cite in soup.findAll('cite'): @@ -152,7 +173,10 @@ def search_bing(url, searchbing, only200): if r2.status == 200: print(bcolors.OKGREEN + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC) elif only200 == False: - print(bcolors.FAIL + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC) + if r2.status >= 300 and r2.status < 400: + print(bcolors.REDIR + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC) + else: + print(bcolors.FAIL + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC) except UnicodeEncodeError: pass @@ -170,6 +194,7 @@ def main(): parse = argparse.ArgumentParser() parse.add_argument('-u', action='store', dest='url', help='Type the URL which will be analyzed') parse.add_argument('-o', action='store_true', dest='only200', help='Show only the "HTTP 200" status code') + parse.add_argument('-s', action='store_true', dest='https', help='Enable https') parse.add_argument('-sb', action='store_true', dest='searchbing', help='Search in Bing indexed Disallows') parse.add_argument('-f', action='store', dest='file', help='Scan a list of domains from a list') @@ -204,12 +229,13 @@ def main(): if url.find("http://") == 0: url = url.replace("http://", "") start_time = time.time() + https = args.https only200 = args.only200 searchbing = args.searchbing date(url) - conn_check(url, only200) + conn_check(url, only200, https) if searchbing == True: - search_bing(url, searchbing, only200) + search_bing(url, searchbing, only200, https) print("\nFinished in %0.2f seconds.\n" % (time.time() - start_time)) if __name__ == "__main__":
The text was updated successfully, but these errors were encountered:
No branches or pull requests
Hi,
I had trouble to use Parsero with https so I made some little changes. It's the first time I wrote Python so I apologize if the code sucks...
Here is the diff:
The text was updated successfully, but these errors were encountered: