-
Notifications
You must be signed in to change notification settings - Fork 0
/
parsing.py
71 lines (52 loc) · 1.97 KB
/
parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import urllib.request
from bs4 import BeautifulSoup
#get html code from url
soup = BeautifulSoup(urllib.request.urlopen("https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=1&ie=utf8&query=%EA%B0%95%EB%AF%BC%EC%A3%BC"
), 'html5lib')
if soup == None :
print("error")
#parsing related keywords
related_keywords_soup = soup.find(id="nx_related_keywords").find('dd')
related_keywords_list = related_keywords_soup.find_all('a')
related_keywords = []
for list in related_keywords_list :
keyword = list.string
related_keywords.append(keyword)
#parsing news topic
news_topic_soup1 = soup.find(id="nxfr_htp").div
news_topic_soup2 = news_topic_soup1.next_sibling.next_sibling
news_topic_soup3 = news_topic_soup2.div.next_sibling.next_sibling
news_topic_list1 = news_topic_soup3.find_all("span", class_="tit")
news_topic_list2 = news_topic_soup3.next_sibling.next_sibling.find_all("span", class_="tit")
news_topic_news = []
news_topic_entertain = []
for list in news_topic_list1 :
keyword = list.string
news_topic_news.append(keyword)
for list in news_topic_list2 :
keyword = list.string
news_topic_entertain.append(keyword)
#parsing target
target_soup = soup.find(id="nxfr_ugrank").div.next_sibling.next_sibling
target = soup.find(id="nxfr_ugrank").div.h2.string
target_list = target_soup.find_all("span",class_="tit")
target_favor = []
for list in target_list :
keyword = list.string
target_favor.append(keyword)
if related_keywords == None :
print("연관검색어 : error")
else :
print("연관검색어 :", related_keywords)
if news_topic_news == None :
print("뉴스토픽 뉴스 : error")
else :
print("뉴스토픽 뉴스 :", news_topic_news)
if news_topic_entertain == None:
print("뉴스토픽 연예,스포츠 : error")
else :
print("뉴스토픽 연예,스포츠 :", news_topic_entertain)
if target == None or target_favor == None :
print("연령대별 검색어 : error")
else :
print(target, ":", target_favor)