-
Notifications
You must be signed in to change notification settings - Fork 0
/
strs2weibo.py
177 lines (139 loc) · 5.86 KB
/
strs2weibo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env python
# -*- coding: utf-8 -*-
activate_this = 'venv/bin/activate_this.py'
execfile(activate_this, dict(__file__=activate_this))
from datetime import datetime
import re
from sqlite3 import dbapi2 as sqlite3
from urllib import urlretrieve
from urlparse import urljoin
from bs4 import BeautifulSoup
import requests
from weibo import APIClient
from config import APP_KEY, APP_SECRET, ACCESS_TOKEN, EXPIRES_IN,\
CALLBACK_URL, ROOT, API_KEY, DATABASE
from proxy import random_headers, PROXY
__version__ = '0.1.0'
__author__ = 'holys <[email protected]>'
def parse_sourcelist(session, url):
"""Parse sourcelist to get filedetail url
session = requests.Session()
"""
thisid_re = re.compile(r"VodAdmin\/(.+?)\'")
response = session.get(url)
response.encoding = 'utf-8'
detail_list = list(set(thisid_re.findall(response.text)))
return [urljoin(ROOT, item) for item in detail_list]
def parse_detail(url):
"""Parse detail information"""
res = requests.get(url)
res.encoding = 'utf-8'
content = res.text
soup = BeautifulSoup(content)
title_re = re.compile(ur"节目名称:\s+(.+)?</td>", re.M)
date_re = re.compile(ur"创建日期.\s(.+)?<")
title = re.search(title_re, content).group(1)
upload_date = re.search(date_re, content).group(1).split(' ')[0].strip()
# change date type `2013-5-29` to `20130529`
upload_date = datetime.strftime(datetime.strptime(upload_date, '%Y-%m-%d'),
'%Y-%m-%d')
download_url = soup.find_all('a')[2].get('href').encode('utf8')
return (title, upload_date, download_url)
def parse_douban(title, api_key=API_KEY):
"""Parse douban movie api result"""
headers = random_headers()
proxies = PROXY
url = 'https://api.douban.com/v2/movie/search?q=%s&apikey=%s' % (title,\
api_key)
result = requests.get(url, headers=headers, proxies=proxies).json()
try:
if result.get('msg'):
print result.get('msg')
if result.get('total') > 0:
douban_title = result.get('subjects')[0].get('title')
douban_url = result.get('subjects')[0].get('alt')
douban_id = result.get('subjects')[0].get('id')
lpic_url = result.get('subjects',\
)[0].get('images').get('large').replace('\\', '')
return (douban_title, douban_url, douban_id, lpic_url)
else:
print "%s not found in douban" % title
except IndexError:
print "f*ck 豆瓣API, %s not found" % title
def count_page(url):
""" Count how many pages of certain url"""
page_re = re.compile(ur"共<.+?>(\d{1,5})<")
response = requests.get(url)
response.encoding = 'utf-8'
return int(re.search(page_re,response.text).group(1))
def retrieve_image(url):
"""Retrieve an image from Internet and return image local path"""
return open(list(urlretrieve(url))[0], 'rb')
def check_update(date):
""" Check if there are new contents """
format_date = datetime.strptime(date, '%Y-%m-%d')
return format_date.date() == datetime.now().date()
def fetch_data(query, db=DATABASE):
"""Fetch data from db for preparing tweeting message """
with sqlite3.connect(db) as conn:
cursor = conn.cursor()
cursor.execute(query)
rows = cursor.fetchall()
if not rows:
print "Can't query data from db"
return False
if len(rows) > 1:
print '-' * 40
douban_title = rows[0][0]
douban_url = rows[0][2]
lpic_url = rows[0][3]
douban_id = rows[0][4]
download_url = [row[1] for row in rows]
else:
douban_title, download_url, douban_url, lpic_url, douban_id = rows[0]
download_url = [download_url]
return douban_title, download_url, douban_url, lpic_url, douban_id
def weibo_upload(status, pic):
"""Tweet with image
+-----------------------------------------------------------+
| +--------------+----------+--------------------+--------+ |
| | #电影传送门# | 电影名称 |下载地址URL1, URL2 | 豆瓣URL| |
| +--------------+----------+--------------------+--------+ |
| 电影海报(图片) |
+-----------------------------------------------------------+
"""
client = APIClient(app_key=APP_KEY, app_secret=APP_SECRET,\
redirect_uri=CALLBACK_URL)
client.set_access_token(ACCESS_TOKEN, EXPIRES_IN)
return client.statuses.upload.post(status=status, pic=pic)
def construct_status(topic, douban_title, douban_url, *download_url):
"""Construct weibo message """
client = APIClient(app_key=APP_KEY, app_secret=APP_SECRET,\
redirect_uri=CALLBACK_URL)
client.set_access_token(ACCESS_TOKEN, EXPIRES_IN)
download_url =\
[client.short_url.shorten.get(url_long=url).get('urls')[0].get('url_short').encode('utf8')\
for url in download_url]
download_url = ' , '.join(download_url)
status = u"#%s#《%s》 下载:%s 豆瓣电影:%s" %(topic, douban_title,\
download_url, douban_url)
return status
def send_weibo(topic, query, update):
"""Send weibo message and update database"""
if not fetch_data(query):
return
douban_title, download_url, douban_url, lpic_url, douban_id =\
fetch_data(query)
lpic = retrieve_image(lpic_url)
status = construct_status(topic, douban_title, douban_url, *download_url)
response = weibo_upload(status=status, pic=lpic)
db = DATABASE
if response.get('created_at'):
with sqlite3.connect(db) as conn:
cursor = conn.cursor()
cursor.execute(update, (douban_id,))
print "\nWeibo sented successfully !\n","douban_id:", douban_id
elif response.get('error_code'):
print response.get('error_code'), response.get('error')
else:
print "Unknown Error!"