forked from karan/Qnowledge
-
Notifications
You must be signed in to change notification settings - Fork 2
/
app.py
76 lines (60 loc) · 2.49 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import re
from flask import Flask, jsonify, render_template
from bs4 import BeautifulSoup
import requests
from purl import URL
app = Flask(__name__)
@app.route('/', methods=['GET'])
def index():
return render_template('index.html')
@app.route('/url=<path:q_link>')
def get_data(q_link):
url = URL(q_link)
if url.domain() not in ['quora.com', 'www.quora.com']:
return 'error, not quora'
url = URL(
scheme='https',
host='www.quora.com',
path=url.path(),
query='share=1').as_string()
soup = BeautifulSoup(requests.get(url).text)
question = {}
question['url'] = url
question['title'] = soup.find("div", {"class": "question_text_edit"}).text
question['topics'] = [topic.text for topic in soup.find_all("div", {"class": "topic_list_item"})]
question['details'] = soup.find("div", {"class": "question_details_text"}).text
answers = []
divs = soup.find_all("div", {"class": "pagedlist_item"})
try:
ans_count = soup.find("div", {"class": "answer_header_text"}).text.strip()
count = int(re.match(r'(\d+) Answers', ans_count).groups()[0])
except:
return jsonify(question=question, answers=answers)
question['answer_count'] = count
count = len(divs) - 1 if count < 6 else 6
for i in range(count):
one_answer = {
'votes': '-1',
'rank': 0,
'answer': ''
}
try:
author = {}
author['name'] = divs[i].find("div", {"class": "answer_user"}).find("span", {"class": "answer_user_wrapper"}).find("a", {"class": "user"}).string
author['bio'] = divs[i].find("div", {"class": "answer_user"}).find("span", {"class": "answer_user_wrapper"}).find_all("span", {"class": "rep"})[1].find("span", {"class": "hidden"}).text
except:
author['name'] = 'Anonymous'
author['bio'] = ''
one_answer['author'] = author
one_answer['votes'] = divs[i].find("span", {"class":"numbers"}).text
html_block = divs[i].find("div", {"id": re.compile("(.*)_container")}).contents
answer_html = ''
for p in range(len(html_block) - 1):
answer_html += str(html_block[p])
one_answer['answer_html'] = answer_html
one_answer['answer'] = divs[i].find("div", {"class": "answer_content"}).text
one_answer['rank'] = i + 1
answers.append(one_answer)
return jsonify(question=question, answers=answers)
if __name__ == '__main__':
app.run(debug=True)