-
Notifications
You must be signed in to change notification settings - Fork 0
/
url-check
46 lines (38 loc) · 1.21 KB
/
url-check
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python
# coding: utf-8
urls = '''
https://google.com
'''
urlslist = urls.splitlines()
headers = {'headers':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0'}
from bs4 import BeautifulSoup
import time, requests
limiter = "'" # с кавычками чтобы видеть пробелы на конце или не вставленные слова
with open('urlcheck.txt', 'w') as the_file:
for line in urlslist:
if line:
try:
r = requests.get(line, headers=headers)
status_code = r.status_code
print(line, status_code)
html = r.text
soup = BeautifulSoup(html, features="lxml")
if soup:
#print(soup)
title = limiter + soup.title.string + limiter
h1 = limiter + soup.find('h1').text + limiter
print('Title:', title)
print(' H1:', h1)
text = line + '\n'
the_file.write(text)
for h2 in soup.find_all('h2'):
h2 = limiter + h2.text + limiter
#print(link.get('href'))
print(' H2:', h2)
text = '<H2> ' + h2 + '\n'
the_file.write(text)
the_file.write('\n')
time.sleep(1)
except requests.exceptions.SSLError as e:
print('Error in line:', line)
print('Error:', e)