Skip to content

Commit

Permalink
feat: refactoring code
Browse files Browse the repository at this point in the history
  • Loading branch information
Kremilly committed Oct 9, 2023
1 parent 0bd790d commit 3a57155
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 43 deletions.
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Linkscraper

<div align="center">
<img src="https://i.imgur.com/m12BVHm.png" align="center"/>
</div>

Requirements:

* Python >= 3.6 ([Download](https://www.python.org/downloads/))

Clone this repository.

```shell
Expand Down Expand Up @@ -226,7 +234,11 @@ python linkscraper -u https://example.com -a get-plugins -p screenshot -f screen

## Changelog

> Current version: ``2.0.1``
> Current version: ``2.0.2``
Minors

* Refactoring code

Fixes

Expand Down Expand Up @@ -279,6 +291,11 @@ Plugins added
* rich
* python-decouple

## Roadmap

* [ ] Implement a micro database key-value type ([TinyDB](https://tinydb.readthedocs.io/en/latest/) like)
* [ ] List of possible pdf's files in URL

## License

Code licensed under [MIT License](https://github.com/kremilly/linkscraper/blob/main/LICENSE)
10 changes: 5 additions & 5 deletions __main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

console = Console(record=True)

version = "2.0.1"
VERSION = "2.0.2"
parser = argparse.ArgumentParser()

parser.add_argument("-u", "--url", help="URL to scan", required=True)
Expand All @@ -21,7 +21,7 @@
parser.add_argument("-b", "--browser", help="Set browser to take screenshot", required=False)
parser.add_argument("-t", "--title", help="Set title the screenshot on Imgur", required=False)
parser.add_argument("-ssc", "--show-status-code", help="Show status code", required=False, default="false")
parser.add_argument("-version", "--version", help="Show current version", action="version", version=version)
parser.add_argument("-version", "--version", help="Show current version", action="version", version=VERSION)
parser.add_argument("-k", "--key", help="Set the API key to use an plugin that is needs this", required=False)
parser.add_argument("-smf", "--show-minify-files", help="Show only minify files", required=False, default="false")
parser.add_argument("-oel", "--only-external-links", help="Show only external links", required=False, default="false")
Expand All @@ -31,15 +31,15 @@
if __name__ == "__main__":
BASE_URL = args.url

if isURL(BASE_URL) != True:
if not is_url(BASE_URL):
console.print("[bold red]Error: URL is missing[/bold red]")
sys.exit(1)

if check_connection(BASE_URL) != True:
if not check_connection(BASE_URL):
console.print("[bold red]Error: connection is not established")
sys.exit(1)

run_home(BASE_URL, version)
run_home(BASE_URL, VERSION)

if not args.action or args.action == "get-core" or args.action == "core":
run_core(BASE_URL)
Expand Down
24 changes: 12 additions & 12 deletions core/download_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def download_js(url, minify_files, filter_data):
table.add_column("Size", style="blue")
table.add_column("Status")

createFolder(path)
create_folder(path)

for script in soup.find_all("script"):
if script.attrs.get("src"):
Expand All @@ -50,17 +50,17 @@ def download_js(url, minify_files, filter_data):

for script_url in list(set(links)):
text = requests.get(script_url).text
file_name = path + getRemoteFileName(script_url)
file_name = path + get_remote_file_size(script_url)

with open(file_name, 'w', encoding="utf-8") as f:
f.write(text)

total_files += 1

if os.path.exists(file_name):
table.add_row(getRemoteFileName(script_url), script_url, localFileSize(file_name), "[bold green]Download completed[/bold green]")
table.add_row(get_remote_file_size(script_url), script_url, local_file_size(file_name), "[bold green]Download completed[/bold green]")
else:
table.add_row(getRemoteFileName(script_url), script_url, localFileSize(file_name), "[bold red]Download failed[/bold red]")
table.add_row(get_remote_file_size(script_url), script_url, local_file_size(file_name), "[bold red]Download failed[/bold red]")

path = os.path.realpath(path)
os.startfile(path)
Expand All @@ -87,7 +87,7 @@ def download_css(url, minify_files, filter_data):
table.add_column("Size", style="blue")
table.add_column("Status")

createFolder(path)
create_folder(path)

for css in soup.find_all("link"):
if css.attrs.get("href"):
Expand All @@ -106,17 +106,17 @@ def download_css(url, minify_files, filter_data):

for css_url in list(set(links)):
text = requests.get(css_url).text
file_name = path + getRemoteFileName(css_url)
file_name = path + get_remote_file_size(css_url)

with open(file_name, 'w', encoding="utf-8") as f:
f.write(text)

total_files += 1

if os.path.exists(file_name):
table.add_row(getRemoteFileName(css_url), css_url, localFileSize(file_name), "[bold green]Download completed[/bold green]")
table.add_row(get_remote_file_size(css_url), css_url, local_file_size(file_name), "[bold green]Download completed[/bold green]")
else:
table.add_row(getRemoteFileName(css_url), css_url, localFileSize(file_name), "[bold red]Download failed[/bold red]")
table.add_row(get_remote_file_size(css_url), css_url, local_file_size(file_name), "[bold red]Download failed[/bold red]")

path = os.path.realpath(path)
os.startfile(path)
Expand All @@ -143,7 +143,7 @@ def download_images(url, filter_data):
table.add_column("Size", style="blue")
table.add_column("Status", style="bold green")

createFolder(path)
create_folder(path)

for img in soup.find_all("img"):
img_url = urljoin(url, img.attrs.get("src"))
Expand All @@ -156,17 +156,17 @@ def download_images(url, filter_data):

for img_url in list(set(links)):
img_data = requests.get(img_url).content
file_name = path + getRemoteFileName(img_url)
file_name = path + get_remote_file_size(img_url)

with open(file_name, 'wb') as handler:
handler.write(img_data)

total_files += 1

if os.path.exists(file_name):
table.add_row(getRemoteFileName(img_url), img_url, localFileSize(file_name), "[bold green]Download completed[/bold green]")
table.add_row(get_remote_file_size(img_url), img_url, local_file_size(file_name), "[bold green]Download completed[/bold green]")
else:
table.add_row(getRemoteFileName(img_url), img_url, localFileSize(file_name), "[bold red]Download failed[/bold red]")
table.add_row(get_remote_file_size(img_url), img_url, local_file_size(file_name), "[bold red]Download failed[/bold red]")

path = os.path.realpath(path)
os.startfile(path)
Expand Down
6 changes: 3 additions & 3 deletions core/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def get_links(url, external_links, status_code, filter_data):
for link in soup.find_all('a'):
if link.get('href') != None:
if filter_data:
if isURL(link.get('href')) and find(link.get('href'), filter_data):
if is_url(link.get('href')) and find(link.get('href'), filter_data):
links.append(link.get('href'))
else:
if not external_links or external_links != "true":
if isURL(link.get('href')):
if is_url(link.get('href')):
links.append(link.get('href'))
else:
if isURL(link.get('href')) and find(get_hostname(link.get('href')), get_hostname(url)) != True:
if is_url(link.get('href')) and find(get_hostname(link.get('href')), get_hostname(url)) != True:
links.append(link.get('href'))

for link in list(set(links)):
Expand Down
6 changes: 3 additions & 3 deletions core/static_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def js_files(url, minify_files, filter_data, download):
links.append(script_url)

for script_url in list(set(links)):
table.add_row(getRemoteFileName(script_url), script_url)
table.add_row(get_remote_file_size(script_url), script_url)
total_files += 1

end_time = "{:.2f}".format(time.time() - start_time)
Expand Down Expand Up @@ -86,7 +86,7 @@ def css_files(url, minify_files, filter_data, download):
links.append(css_url)

for css_url in list(set(links)):
table.add_row(getRemoteFileName(css_url), css_url)
table.add_row(get_remote_file_size(css_url), css_url)
total_files += 1

end_time = "{:.2f}".format(time.time() - start_time)
Expand Down Expand Up @@ -119,7 +119,7 @@ def images_files(url, filter_data, download):
links.append(img_url)

for img_url in list(set(links)):
table.add_row(getRemoteFileName(img_url), img_url)
table.add_row(get_remote_file_size(img_url), img_url)
total_files += 1

end_time = "{:.2f}".format(time.time() - start_time)
Expand Down
20 changes: 10 additions & 10 deletions plugins/imgur.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@

console = Console(record=True)

def getTitle(title):
def get_title(title):
if not title:
return 'Screenshot made by Linkscraper'
else:
return title

def embedCode(imgur_code_img, direct_link, imgur_page, title):
def embed_code(imgur_code_img, direct_link, imgur_page, title):
console.print("-" * 60)
console.print("Embed codes")
console.print("-" * 60)

console.print(f'[italic yellow]Imgur Post[/italic yellow]: <blockquote class="imgur-embed-pub" lang="en" data-id="{imgur_code_img}"><a href="{removeExtension(imgur_page)}">{getTitle(title)}</a></blockquote><script async src="//s.imgur.com/min/embed.js" charset="utf-8"></script>')
console.print(f"[italic yellow]HTML[/italic yellow]: <img src='{direct_link}' alt='{getTitle(title)}'>")
console.print(f"[italic yellow]Markdown[/italic yellow]: ![{getTitle(title)}]({direct_link})")
console.print(f'[italic yellow]Imgur Post[/italic yellow]: <blockquote class="imgur-embed-pub" lang="en" data-id="{imgur_code_img}"><a href="{remove_extension(imgur_page)}">{get_title(title)}</a></blockquote><script async src="//s.imgur.com/min/embed.js" charset="utf-8"></script>')
console.print(f"[italic yellow]HTML[/italic yellow]: <img src='{direct_link}' alt='{get_title(title)}'>")
console.print(f"[italic yellow]Markdown[/italic yellow]: ![{get_title(title)}]({direct_link})")
console.print(f"[italic yellow]BBCode[/italic yellow]: [img]{direct_link}[/img]")

def plugin_imgur(file, key, title):
Expand All @@ -37,20 +37,20 @@ def plugin_imgur(file, key, title):
response = requests.request("POST", "https://api.imgur.com/3/image", headers = {
'Authorization': f"Client-ID {key}"
}, data = {
'image': toBase64(file),
'title': getTitle(title)
'image': to_base64(file),
'title': get_title(title)
})

callback = response.json()
if callback["success"] == True:
direct_link = callback['data']['link']
imgur_page = direct_link.replace("i.", "")
imgur_code_img = removeExtension(imgur_page).replace("https://imgur.com/", "")
imgur_code_img = remove_extension(imgur_page).replace("https://imgur.com/", "")

console.print(f"Imgur page: [bold green]{removeExtension(imgur_page)}[/bold green]")
console.print(f"Imgur page: [bold green]{remove_extension(imgur_page)}[/bold green]")
console.print(f"Link Direct: [bold green]{direct_link}[/bold green]")

embedCode(imgur_code_img, direct_link, imgur_page, title)
embed_code(imgur_code_img, direct_link, imgur_page, title)

console.print("-" * 60)
pyperclip.copy(direct_link)
Expand Down
2 changes: 1 addition & 1 deletion utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def isJSON(string):
except ValueError as e:
return False

def isURL(string, check_protocol = True):
def is_url(string, check_protocol = True):
url_pattern_check_protocol = "^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$"
url_pattern = "^[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$"

Expand Down
16 changes: 8 additions & 8 deletions utils/utils_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

from utils.utils import *

def localFileSize(file):
def local_file_size(file):
file_size = os.stat(file)
return humanSize(file_size.st_size)

def remoteFileSize(url):
def remote_file_size(url):
try:
req_headers = requests.get(url)
return humanSize(
Expand All @@ -18,32 +18,32 @@ def remoteFileSize(url):
except:
return None

def toBase64(file):
def to_base64(file):
with open(file, "rb") as f:
output = base64.b64encode(f.read())

return output

def removeExtension(file):
def remove_extension(file):
return file.rsplit(".", 1)[0]

def getExtension(file):
def get_extension(file):
ext = os.path.splitext(file)

if ext != "" or ext != ".":
return ext
else:
return None

def getFileName(string):
def get_file_name(string):
name = os.path.split(string)[1]

if find(string, "?"):
return removeQuery(name)
else:
return name

def getRemoteFileName(url):
def get_remote_file_size(url):
a = urlparse(url)
basename = os.path.basename(a.path)

Expand All @@ -54,6 +54,6 @@ def getRemoteFileName(url):
if find(file, ".") and len(file) > 1:
return file

def createFolder(folder):
def create_folder(folder):
if os.path.isdir(folder) != True:
os.makedirs(folder)

0 comments on commit 3a57155

Please sign in to comment.