diff --git a/README.md b/README.md
index 21fee24..3ce8af6 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,13 @@
# Linkscraper
+
+
+
+
+Requirements:
+
+* Python >= 3.6 ([Download](https://www.python.org/downloads/))
+
Clone this repository.
```shell
@@ -226,7 +234,11 @@ python linkscraper -u https://example.com -a get-plugins -p screenshot -f screen
## Changelog
-> Current version: ``2.0.1``
+> Current version: ``2.0.2``
+
+Minors
+
+* Refactoring code
Fixes
@@ -279,6 +291,11 @@ Plugins added
* rich
* python-decouple
+## Roadmap
+
+* [ ] Implement a micro database key-value type ([TinyDB](https://tinydb.readthedocs.io/en/latest/) like)
+* [ ] List of possible pdf's files in URL
+
## License
Code licensed under [MIT License](https://github.com/kremilly/linkscraper/blob/main/LICENSE)
diff --git a/__main__.py b/__main__.py
index 35e6ea5..005c289 100644
--- a/__main__.py
+++ b/__main__.py
@@ -8,7 +8,7 @@
console = Console(record=True)
-version = "2.0.1"
+VERSION = "2.0.2"
parser = argparse.ArgumentParser()
parser.add_argument("-u", "--url", help="URL to scan", required=True)
@@ -21,7 +21,7 @@
parser.add_argument("-b", "--browser", help="Set browser to take screenshot", required=False)
parser.add_argument("-t", "--title", help="Set title the screenshot on Imgur", required=False)
parser.add_argument("-ssc", "--show-status-code", help="Show status code", required=False, default="false")
-parser.add_argument("-version", "--version", help="Show current version", action="version", version=version)
+parser.add_argument("-version", "--version", help="Show current version", action="version", version=VERSION)
parser.add_argument("-k", "--key", help="Set the API key to use an plugin that is needs this", required=False)
parser.add_argument("-smf", "--show-minify-files", help="Show only minify files", required=False, default="false")
parser.add_argument("-oel", "--only-external-links", help="Show only external links", required=False, default="false")
@@ -31,15 +31,15 @@
if __name__ == "__main__":
BASE_URL = args.url
- if isURL(BASE_URL) != True:
+ if not is_url(BASE_URL):
console.print("[bold red]Error: URL is missing[/bold red]")
sys.exit(1)
- if check_connection(BASE_URL) != True:
+ if not check_connection(BASE_URL):
console.print("[bold red]Error: connection is not established")
sys.exit(1)
- run_home(BASE_URL, version)
+ run_home(BASE_URL, VERSION)
if not args.action or args.action == "get-core" or args.action == "core":
run_core(BASE_URL)
diff --git a/core/download_files.py b/core/download_files.py
index 4d92c0c..a1c5000 100644
--- a/core/download_files.py
+++ b/core/download_files.py
@@ -32,7 +32,7 @@ def download_js(url, minify_files, filter_data):
table.add_column("Size", style="blue")
table.add_column("Status")
- createFolder(path)
+ create_folder(path)
for script in soup.find_all("script"):
if script.attrs.get("src"):
@@ -50,7 +50,7 @@ def download_js(url, minify_files, filter_data):
for script_url in list(set(links)):
text = requests.get(script_url).text
- file_name = path + getRemoteFileName(script_url)
+ file_name = path + get_remote_file_size(script_url)
with open(file_name, 'w', encoding="utf-8") as f:
f.write(text)
@@ -58,9 +58,9 @@ def download_js(url, minify_files, filter_data):
total_files += 1
if os.path.exists(file_name):
- table.add_row(getRemoteFileName(script_url), script_url, localFileSize(file_name), "[bold green]Download completed[/bold green]")
+ table.add_row(get_remote_file_size(script_url), script_url, local_file_size(file_name), "[bold green]Download completed[/bold green]")
else:
- table.add_row(getRemoteFileName(script_url), script_url, localFileSize(file_name), "[bold red]Download failed[/bold red]")
+ table.add_row(get_remote_file_size(script_url), script_url, local_file_size(file_name), "[bold red]Download failed[/bold red]")
path = os.path.realpath(path)
os.startfile(path)
@@ -87,7 +87,7 @@ def download_css(url, minify_files, filter_data):
table.add_column("Size", style="blue")
table.add_column("Status")
- createFolder(path)
+ create_folder(path)
for css in soup.find_all("link"):
if css.attrs.get("href"):
@@ -106,7 +106,7 @@ def download_css(url, minify_files, filter_data):
for css_url in list(set(links)):
text = requests.get(css_url).text
- file_name = path + getRemoteFileName(css_url)
+ file_name = path + get_remote_file_size(css_url)
with open(file_name, 'w', encoding="utf-8") as f:
f.write(text)
@@ -114,9 +114,9 @@ def download_css(url, minify_files, filter_data):
total_files += 1
if os.path.exists(file_name):
- table.add_row(getRemoteFileName(css_url), css_url, localFileSize(file_name), "[bold green]Download completed[/bold green]")
+ table.add_row(get_remote_file_size(css_url), css_url, local_file_size(file_name), "[bold green]Download completed[/bold green]")
else:
- table.add_row(getRemoteFileName(css_url), css_url, localFileSize(file_name), "[bold red]Download failed[/bold red]")
+ table.add_row(get_remote_file_size(css_url), css_url, local_file_size(file_name), "[bold red]Download failed[/bold red]")
path = os.path.realpath(path)
os.startfile(path)
@@ -143,7 +143,7 @@ def download_images(url, filter_data):
table.add_column("Size", style="blue")
table.add_column("Status", style="bold green")
- createFolder(path)
+ create_folder(path)
for img in soup.find_all("img"):
img_url = urljoin(url, img.attrs.get("src"))
@@ -156,7 +156,7 @@ def download_images(url, filter_data):
for img_url in list(set(links)):
img_data = requests.get(img_url).content
- file_name = path + getRemoteFileName(img_url)
+ file_name = path + get_remote_file_size(img_url)
with open(file_name, 'wb') as handler:
handler.write(img_data)
@@ -164,9 +164,9 @@ def download_images(url, filter_data):
total_files += 1
if os.path.exists(file_name):
- table.add_row(getRemoteFileName(img_url), img_url, localFileSize(file_name), "[bold green]Download completed[/bold green]")
+ table.add_row(get_remote_file_size(img_url), img_url, local_file_size(file_name), "[bold green]Download completed[/bold green]")
else:
- table.add_row(getRemoteFileName(img_url), img_url, localFileSize(file_name), "[bold red]Download failed[/bold red]")
+ table.add_row(get_remote_file_size(img_url), img_url, local_file_size(file_name), "[bold red]Download failed[/bold red]")
path = os.path.realpath(path)
os.startfile(path)
diff --git a/core/scraper.py b/core/scraper.py
index 193c58f..438c7a4 100644
--- a/core/scraper.py
+++ b/core/scraper.py
@@ -31,14 +31,14 @@ def get_links(url, external_links, status_code, filter_data):
for link in soup.find_all('a'):
if link.get('href') != None:
if filter_data:
- if isURL(link.get('href')) and find(link.get('href'), filter_data):
+ if is_url(link.get('href')) and find(link.get('href'), filter_data):
links.append(link.get('href'))
else:
if not external_links or external_links != "true":
- if isURL(link.get('href')):
+ if is_url(link.get('href')):
links.append(link.get('href'))
else:
- if isURL(link.get('href')) and find(get_hostname(link.get('href')), get_hostname(url)) != True:
+ if is_url(link.get('href')) and find(get_hostname(link.get('href')), get_hostname(url)) != True:
links.append(link.get('href'))
for link in list(set(links)):
diff --git a/core/static_files.py b/core/static_files.py
index 73cfcd0..c5b34e7 100644
--- a/core/static_files.py
+++ b/core/static_files.py
@@ -47,7 +47,7 @@ def js_files(url, minify_files, filter_data, download):
links.append(script_url)
for script_url in list(set(links)):
- table.add_row(getRemoteFileName(script_url), script_url)
+ table.add_row(get_remote_file_size(script_url), script_url)
total_files += 1
end_time = "{:.2f}".format(time.time() - start_time)
@@ -86,7 +86,7 @@ def css_files(url, minify_files, filter_data, download):
links.append(css_url)
for css_url in list(set(links)):
- table.add_row(getRemoteFileName(css_url), css_url)
+ table.add_row(get_remote_file_size(css_url), css_url)
total_files += 1
end_time = "{:.2f}".format(time.time() - start_time)
@@ -119,7 +119,7 @@ def images_files(url, filter_data, download):
links.append(img_url)
for img_url in list(set(links)):
- table.add_row(getRemoteFileName(img_url), img_url)
+ table.add_row(get_remote_file_size(img_url), img_url)
total_files += 1
end_time = "{:.2f}".format(time.time() - start_time)
diff --git a/plugins/imgur.py b/plugins/imgur.py
index 17b478c..c2de3ff 100644
--- a/plugins/imgur.py
+++ b/plugins/imgur.py
@@ -8,20 +8,20 @@
console = Console(record=True)
-def getTitle(title):
+def get_title(title):
if not title:
return 'Screenshot made by Linkscraper'
else:
return title
-def embedCode(imgur_code_img, direct_link, imgur_page, title):
+def embed_code(imgur_code_img, direct_link, imgur_page, title):
console.print("-" * 60)
console.print("Embed codes")
console.print("-" * 60)
- console.print(f'[italic yellow]Imgur Post[/italic yellow]: {getTitle(title)}
')
- console.print(f"[italic yellow]HTML[/italic yellow]: ")
- console.print(f"[italic yellow]Markdown[/italic yellow]: ![{getTitle(title)}]({direct_link})")
+ console.print(f'[italic yellow]Imgur Post[/italic yellow]: {get_title(title)}
')
+ console.print(f"[italic yellow]HTML[/italic yellow]: ")
+ console.print(f"[italic yellow]Markdown[/italic yellow]: ![{get_title(title)}]({direct_link})")
console.print(f"[italic yellow]BBCode[/italic yellow]: [img]{direct_link}[/img]")
def plugin_imgur(file, key, title):
@@ -37,20 +37,20 @@ def plugin_imgur(file, key, title):
response = requests.request("POST", "https://api.imgur.com/3/image", headers = {
'Authorization': f"Client-ID {key}"
}, data = {
- 'image': toBase64(file),
- 'title': getTitle(title)
+ 'image': to_base64(file),
+ 'title': get_title(title)
})
callback = response.json()
if callback["success"] == True:
direct_link = callback['data']['link']
imgur_page = direct_link.replace("i.", "")
- imgur_code_img = removeExtension(imgur_page).replace("https://imgur.com/", "")
+ imgur_code_img = remove_extension(imgur_page).replace("https://imgur.com/", "")
- console.print(f"Imgur page: [bold green]{removeExtension(imgur_page)}[/bold green]")
+ console.print(f"Imgur page: [bold green]{remove_extension(imgur_page)}[/bold green]")
console.print(f"Link Direct: [bold green]{direct_link}[/bold green]")
- embedCode(imgur_code_img, direct_link, imgur_page, title)
+ embed_code(imgur_code_img, direct_link, imgur_page, title)
console.print("-" * 60)
pyperclip.copy(direct_link)
diff --git a/utils/utils.py b/utils/utils.py
index cd783ce..ce07db6 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -62,7 +62,7 @@ def isJSON(string):
except ValueError as e:
return False
-def isURL(string, check_protocol = True):
+def is_url(string, check_protocol = True):
url_pattern_check_protocol = "^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$"
url_pattern = "^[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$"
diff --git a/utils/utils_files.py b/utils/utils_files.py
index f5eadc1..2325783 100644
--- a/utils/utils_files.py
+++ b/utils/utils_files.py
@@ -5,11 +5,11 @@
from utils.utils import *
-def localFileSize(file):
+def local_file_size(file):
file_size = os.stat(file)
return humanSize(file_size.st_size)
-def remoteFileSize(url):
+def remote_file_size(url):
try:
req_headers = requests.get(url)
return humanSize(
@@ -18,16 +18,16 @@ def remoteFileSize(url):
except:
return None
-def toBase64(file):
+def to_base64(file):
with open(file, "rb") as f:
output = base64.b64encode(f.read())
return output
-def removeExtension(file):
+def remove_extension(file):
return file.rsplit(".", 1)[0]
-def getExtension(file):
+def get_extension(file):
ext = os.path.splitext(file)
if ext != "" or ext != ".":
@@ -35,7 +35,7 @@ def getExtension(file):
else:
return None
-def getFileName(string):
+def get_file_name(string):
name = os.path.split(string)[1]
if find(string, "?"):
@@ -43,7 +43,7 @@ def getFileName(string):
else:
return name
-def getRemoteFileName(url):
+def get_remote_file_size(url):
a = urlparse(url)
basename = os.path.basename(a.path)
@@ -54,6 +54,6 @@ def getRemoteFileName(url):
if find(file, ".") and len(file) > 1:
return file
-def createFolder(folder):
+def create_folder(folder):
if os.path.isdir(folder) != True:
os.makedirs(folder)