-
Notifications
You must be signed in to change notification settings - Fork 0
/
scripts.py
68 lines (63 loc) · 2.59 KB
/
scripts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import json, csv, requests, os, shutil, re
from datetime import datetime
from pytz import timezone
def getStories(subcats,items=10):
# raw_input for items ???
payload = {'items': items, 'subcats': subcats}
url = 'http://registerguard.com/csp/cms/sites/rg/feeds/json.csp'
try:
r = requests.get(url, params=payload)
print("got {}".format(r.url))
except:
print('bad request: {0}?items={1}&subcats={2}'.format(url, items, subcats))
try:
json = r.json()
print("got json")
except:
print('bad json: {0}?items={1}&subcats={2}'.format(url, items, subcats))
#hits = json['hits']
stories = json['stories']
return stories
def storyCSV(stories,csvname='stories.csv'):
# Error checking for csv extension???
print("Writing {0}...".format(csvname))
with open(csvname,'w',newline='') as csvfile:
fieldnames = ['headline','url','author','pubdate']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for story in stories:
writer.writerow({'headline': story['headline'], 'url': story['path'], 'author': story['byline'], 'pubdate': story['published']})
# print(story['headline'])
"""
fieldnames = ['headline','url','author','pubdate','video']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for story in stories:
vid = story['video']
vidRegex = r"youtube\.com|youtu\.be"
vidTest = re.search(vidRegex,vid)
if (vidTest):
writer.writerow({'headline': story['headline'], 'url': story['path'], 'author': story['byline'], 'pubdate': story['published'], 'video': story['video']})
# print(story['headline'])
"""
print("{0} has been written.".format(csvname))
def createFolders(filePath):
if (os.path.isdir('{0}'.format(filePath)) == False):
os.makedirs('{0}'.format(filePath))
def getDatetime(dateString):
# Return datetime object
dateTEMP = datetime.strptime(dateString, '%Y-%m-%d %H:%M:%S')
pacific = timezone('America/Los_Angeles')
dateTEMP = pacific.localize(dateTEMP)
return dateTEMP
def getImage(url,imgPath,overname=""):
filename = url.split('/')[-1]
if overname:
path = '{0}/{1}.jpg'.format(imgPath,overname)
else:
path = '{0}/{1}'.format(imgPath,filename)
rimg = requests.get(url, stream=True, verify=False)
if (rimg.status_code == 200):
with open(path, 'wb') as f:
rimg.raw.decode_content = True
shutil.copyfileobj(rimg.raw, f)