-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapeMessages.py
97 lines (76 loc) · 2.6 KB
/
scrapeMessages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import configparser
import json
import asyncio
from datetime import date, datetime
from telethon import TelegramClient
from telethon.errors import SessionPasswordNeededError
from telethon.tl.functions.messages import (GetHistoryRequest)
from telethon.tl.types import (
PeerChannel
)
# some functions to parse json date
class DateTimeEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, datetime):
return o.isoformat()
if isinstance(o, bytes):
return list(o)
return json.JSONEncoder.default(self, o)
# Reading Configs
config = configparser.ConfigParser()
config.read("config.ini")
# Setting configuration values
api_id = config['Telegram']['api_id']
api_hash = config['Telegram']['api_hash']
api_hash = str(api_hash)
phone = config['Telegram']['phone']
username = config['Telegram']['username']
# Create the client and connect
client = TelegramClient(username, api_id, api_hash)
async def main(phone):
await client.start()
print("Client Created")
# Ensure you're authorized
if await client.is_user_authorized() == False:
await client.send_code_request(phone)
try:
await client.sign_in(phone, input('Enter the code: '))
except SessionPasswordNeededError:
await client.sign_in(password=input('Password: '))
me = await client.get_me()
user_input_channel = input('enter entity(telegram URL or entity id):')
if user_input_channel.isdigit():
entity = PeerChannel(int(user_input_channel))
else:
entity = user_input_channel
my_channel = await client.get_entity(entity)
offset_id = 0
limit = 100
all_messages = []
total_messages = 0
total_count_limit = 0
while True:
print("Current Offset ID is:", offset_id, "; Total Messages:", total_messages)
history = await client(GetHistoryRequest(
peer=my_channel,
offset_id=offset_id,
offset_date=None,
add_offset=0,
limit=limit,
max_id=0,
min_id=0,
hash=0
))
if not history.messages:
break
messages = history.messages
for message in messages:
all_messages.append(message.to_dict())
offset_id = messages[len(messages) - 1].id
total_messages = len(all_messages)
if total_count_limit != 0 and total_messages >= total_count_limit:
break
with open('channel_messages.json', 'w') as outfile:
json.dump(all_messages, outfile, cls=DateTimeEncoder)
with client:
client.loop.run_until_complete(main(phone))