-
Notifications
You must be signed in to change notification settings - Fork 0
/
top_stories.py
52 lines (38 loc) · 1.72 KB
/
top_stories.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Write a Python program that returns top stories from http://www.technewsworld.com
# The program should retrieve the most recent top 10 stories in the form: title, date, and URL.
# Since many third-party libraries are not supported on the CSE machines, your solution does not have
# to run on the CSE machines. However, if you use any additional libraries, they must also be available via
# Pip -> https://pip.pypa.io/en/stable/
# easy_install -> https://pypi.python.org/pypi/setuptools
# (Unofficial Binaries) -> http://www.lfd.uci.edu/~gohlke/pythonlibs/
# Tips: Beautiful soup, urllib, and urllib2 are libraries I use often for getting information from web pages.
import urllib2
from bs4 import BeautifulSoup
def main():
url = "http://www.technewsworld.com"
data = urllib2.urlopen(url).read()
soup = BeautifulSoup(data, 'html.parser')
news = soup.find_all("div", class_="story-list")
x = 0
article = {}
for element in news:
title = element.a.get_text()
if title != '':
article[news[x].find(class_="title").get_text()] = {}
x += 1
x = 0
for element in news:
title = element.a.get_text()
if title != '':
article[news[x].find(class_="title").get_text()]["link"] = url + element.a["href"]
x += 1
iterator = 0
for element in news:
title = element.a.get_text()
if title != '':
article[news[iterator].find(class_="title").get_text()]["date"] = news[iterator].find(class_="date").get_text()
iterator += 1
for item in article.keys():
print item + ": " + "\n\t" + "link: " + article[item]["link"] + "\n\t" + "date: " + article[item]["date"] + "\n\n"
if __name__ == "__main__":
main()