forked from evanc577/sourcecatcher
-
Notifications
You must be signed in to change notification settings - Fork 0
/
web_server.py
133 lines (112 loc) · 3.64 KB
/
web_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from flask import Flask, flash, redirect, render_template, request, session, abort
from find_match import find, stats
from werkzeug.utils import secure_filename
import requests
import urllib
import os
UPLOAD_FOLDER = 'uploads'
try:
os.mkdir(UPLOAD_FOLDER)
except:
pass
ALLOWED_EXTENSIONS = set(['png', 'jpg', 'jpeg', 'gif'])
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 15 * 1024 * 1024
@app.route('/upload', methods=['GET', 'POST'])
def upload():
if request.method == 'POST':
f = request.files['file']
filename = secure_filename(f.filename)
if f.filename == '':
flash('No selected file')
return redirect(request.url)
path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
f.save(path)
html = find_and_render('file', path)
os.remove(path)
return html
else:
link = request.args.get('link')
return find_and_render('url', link)
@app.route('/')
def root():
link = request.args.get('link')
return find_and_render('url', link)
def find_and_render(location, path):
basename = None
tweet_id = None
direct_link = None
tweet_source = None
embed = None
embed2 = None
embed3 = None
num_photos, num_tweets, mtime= stats()
if path is not None:
try:
if location == 'url':
found = map(list, zip(*find('url', path)))
elif location == 'file':
found = map(list, zip(*find('file', path)))
id_set = set()
count = 0
for candidate in found:
basename, tweet_id = candidate
if tweet_id in id_set:
continue
direct_link = 'https://pbs.twimg.com/media/{}'.format(basename)
tweet_source = 'https://www.twitter.com/statuses/{}'.format(tweet_id)
if count == 0:
embed = get_embed(tweet_id)
elif count == 1:
embed2 = get_embed(tweet_id)
elif count == 2:
embed3 = get_embed(tweet_id)
id_set.add(tweet_id)
count += 1
except Exception as e:
print(e)
kwargs = {
'direct_link': direct_link,
'tweet_source': tweet_source,
'embed': embed,
'embed2': embed2,
'embed3': embed3,
'num_photos': num_photos,
'num_tweets': num_tweets,
'mtime': mtime,
}
if location == 'url':
kwargs['link'] = path
if path is not None:
kwargs['nothing'] = True
return render_template('test.html', **kwargs)
def add_result_title(html, tweet_source):
header = '<div class="result">\n<div class="result_title">\n<a href={0} ">{0}</a>'.format(tweet_source)
footer = '\n</div>'
return header + html + footer
def get_embed(tweet_id):
"""get html for an embedded tweet"""
tweet_source = 'https://www.twitter.com/a/status/{}'.format(tweet_id)
url = urllib.parse.quote(tweet_source, safe='')
get_url = 'https://publish.twitter.com/oembed?url={}'.format(url)
try:
r = requests.get(url=get_url)
html = r.json()['html']
return html
except:
return None
def remove_scripts(html):
"""experimental: remove scripts from html"""
begin = '<script'
end = '</script>'
idx1 = html.find(begin)
if idx1 == -1:
return html
idx2 = html.find(end)
if idx2 == -1:
html = html[:idx1]
else:
idx2 = idx2 + len(end)
html = html[:idx1] + html[idx2:]
return html