-
-
Notifications
You must be signed in to change notification settings - Fork 82
/
main.py
130 lines (108 loc) · 3.49 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# © BugHunterCodeLabs ™
# © bughunter0
# © Nuhman Pk
# 2021 - 2024
# Copyright - https://en.m.wikipedia.org/wiki/Fair_use
# /usr/bin/nuhmanpk/bughunter0
import os
from pyrogram import Client, filters
from dotenv import load_dotenv
import os
from pyrogram.types import Message
from scraper import (
all_audio_scraping,
all_images_scraping,
all_links_scraping,
all_paragraph_scraping,
all_pdf_scraping,
all_video_scraping,
extract_cookies,
extract_local_storage,
html_data_scraping,
raw_data_scraping,
extract_metadata,
capture_screenshot,
record_screen
)
from crawler import crawl_web
from utils import OPTIONS, START_BUTTON, START_TEXT
load_dotenv()
bot_token = os.getenv("BOT_TOKEN")
api_id = os.getenv("API_ID")
api_hash = os.getenv("API_HASH")
CRAWL_LOG_CHANNEL = os.getenv('CRAWL_LOG_CHANNEL')
if bot_token is None or api_id is None or api_hash is None:
raise ValueError(
"Please set the BOT_TOKEN, API_ID, and API_HASH environment variables."
)
app = Client(
"WebScrapperBot", bot_token=bot_token, api_id=int(api_id), api_hash=api_hash
)
@app.on_message(filters.command(["start"]))
async def start(_, message: Message):
# Edit Your Start string here
text = START_TEXT
await message.reply_text(text=text, disable_web_page_preview=True, quote=True)
@app.on_callback_query()
async def cb_data(bot:Client, update):
if update.data == "cbrdata":
await raw_data_scraping(update)
elif update.data == "cbhtmldata":
await html_data_scraping(update)
elif update.data == "cballlinks":
await all_links_scraping(update)
elif update.data == "cballparagraphs":
await all_paragraph_scraping(update)
elif update.data == "cballimages":
await all_images_scraping(bot,update)
elif update.data == "cballaudio":
await all_audio_scraping(bot,update)
elif update.data == "cballvideo":
await all_video_scraping(bot,update)
elif update.data == "cballpdf":
await all_pdf_scraping(update)
elif update.data == "cbmetadata":
await extract_metadata(update)
elif update.data == "cbcookies":
await extract_cookies(update)
elif update.data == "cblocalstorage":
await extract_local_storage(update)
elif update.data == "cbscreenshot":
await capture_screenshot(update)
elif update.data == "cbscreenrecord":
await record_screen(update)
elif update.data == "cdstoptrasmission":
bot.stop_transmission()
elif update.data == 'cbcrawl':
if CRAWL_LOG_CHANNEL:
await crawl_web(bot,update)
else:
await update.message.reply('You must provide a Log Channel ID')
else:
await update.message.edit_text(
text=START_TEXT, disable_web_page_preview=True, reply_markup=START_BUTTON
)
@app.on_message(
(filters.regex("https") | filters.regex("http") | filters.regex("www"))
& filters.private
)
async def scrapping(bot, message):
await send_message_with_options(message)
async def send_message_with_options(message):
reply_markup = OPTIONS
await message.reply_text("Choose an Option")
await message.reply_text(
message.text, reply_markup=reply_markup, disable_web_page_preview=True
)
# Use soup.find_all('tag_name') to Extract Specific Tag Details
"""
soup.title
# <title>This is Title</title>
soup.title.name
# u'title'
soup.title.string
# u'This is a string'
soup.title.parent.name
# u'head'
"""
app.run(print("Bot Running...."))