Skip to content

Commit

Permalink
Merge pull request #64 from rohithpr/master
Browse files Browse the repository at this point in the history
Added tests for question stats, answer stats and try_cast_int
  • Loading branch information
csu committed Feb 9, 2015
2 parents 3dfe327 + d394c29 commit 168be04
Show file tree
Hide file tree
Showing 8 changed files with 1,040 additions and 11 deletions.
2 changes: 1 addition & 1 deletion quora/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from user import User, Activity
from quora import Quora
from quora import Quora,try_cast_int
23 changes: 15 additions & 8 deletions quora/quora.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,22 @@
####################################################################
def try_cast_int(s):
try:
temp = re.findall('\d', str(s))
temp = ''.join(temp)
return int(temp)
except ValueError:
pattern = re.compile(r'([0-9]+(\.[0-9]+)*[ ]*[Kk])|([0-9]+)')
raw_result = re.search(pattern, s).groups()
if raw_result[2] != None:
return int(raw_result[2])
elif raw_result[1] == None:
raw_result = re.search(r'([0-9]+)', raw_result[0])
return int(raw_result.groups()[0]) * 1000
else:
raw_result = re.search(r'([0-9]+)\.([0-9]+)', raw_result[0]).groups()
return int(raw_result[0]) * 1000 + int(raw_result[1]) * 100
except:
return s

def get_question_link(soup):
question_link = soup.find('a', attrs = {'class' : 'question_link'})
return question_link.get('href')
return 'http://www.quora.com' + question_link.get('href')

def get_author(soup):
raw_author = soup.find('div', attrs = {'class' : 'author_info'}).next.get('href')
Expand Down Expand Up @@ -56,7 +63,7 @@ def scrape_one_answer(soup):
want_answers = soup.find('span', attrs = {'class' : 'count'}).string

try:
upvote_count = soup.find('a', attrs = {'class' : 'vote_item_link'}).find('span', attrs = {'class' : 'count'})
upvote_count = soup.find('a', attrs = {'class' : 'vote_item_link'}).find('span', attrs = {'class' : 'count'}).string
if upvote_count is None:
upvote_count = 0
except:
Expand Down Expand Up @@ -124,8 +131,8 @@ def scrape_question_stats(soup):
'answer_count' : try_cast_int(answer_count),
'question_text' : question_text.string,
'topics' : topics,
'question_details' : question_details.string,
'answer_wiki' : answer_wiki.string,
'question_details' : str(question_details),
'answer_wiki' : str(answer_wiki),
}
return question_dict

Expand Down
196 changes: 196 additions & 0 deletions tests/input_files/answer_1

Large diffs are not rendered by default.

321 changes: 321 additions & 0 deletions tests/input_files/question_1

Large diffs are not rendered by default.

441 changes: 441 additions & 0 deletions tests/input_files/question_2

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions tests/test_answer_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#coding=utf-8

from bs4 import BeautifulSoup
from nose import with_setup
from quora import Quora

EXPECTED_STATS_1 = {'want_answers': 75,
'views': 195,
'author': u'Mayur-P-R-Rohith',
'question_link': u'http://www.quora.com/What-are-some-mistakes-you-noticed-on-Friends',
'comment_count': 1,
'answer': '<div id="__w2_ri9P9zc_container">S4E20:<br/><br/>Notice the whiteboard.<br/><div><img class="landscape qtext_image zoomable_in zoomable_in_feed" master_h="768" master_src="http://qph.is.quoracdn.net/main-qimg-5f49eaf19b138cd9df79cffe5b727869?convert_to_webp=true" master_w="1366" src="http://qph.is.quoracdn.net/main-qimg-5c869ecdfd33c35392a4de2e72f4c05f?convert_to_webp=true"/></div>10:02- "get out"<br/><br/><div><img class="landscape qtext_image zoomable_in zoomable_in_feed" master_h="768" master_src="http://qph.is.quoracdn.net/main-qimg-5bef87d5441e2b640d781fb955b9ca95?convert_to_webp=true" master_w="1366" src="http://qph.is.quoracdn.net/main-qimg-3cecd38e3f868ab419fd47978ba1ce70?convert_to_webp=true"/></div>10:21- "Poop"<br/><br/><div><img class="landscape qtext_image zoomable_in zoomable_in_feed" master_h="768" master_src="http://qph.is.quoracdn.net/main-qimg-318c3847fe38bc356cc50c744930b9cf?convert_to_webp=true" master_w="1366" src="http://qph.is.quoracdn.net/main-qimg-91cb8136c6b29b9955cbc17abd1bfdfe?convert_to_webp=true"/></div>10:36- "Poop"<br/><br/><div><img class="landscape qtext_image zoomable_in zoomable_in_feed" master_h="768" master_src="http://qph.is.quoracdn.net/main-qimg-8b382517079a9767f42559733c6e3ffe?convert_to_webp=true" master_w="1366" src="http://qph.is.quoracdn.net/main-qimg-a755dc574444bf46927bf81ccb279f6b?convert_to_webp=true"/></div>10:43- "get out"<div class="container_boundary" id="__w2_ri9P9zc_container_boundary" style="margin:0px; padding:0px; height:0px; width:0px;"></div></div>',
'upvote_count': 3,
}

class TestAnswerScraper:

def test_correct(self):
q = Quora()
stats1 = q.scrape_one_answer(BeautifulSoup(open('tests/input_files/answer_1')))

assert stats1 == EXPECTED_STATS_1
11 changes: 11 additions & 0 deletions tests/test_helper_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import quora

def test_try_cast_int():
assert quora.try_cast_int('200 Upvotes') == 200
assert quora.try_cast_int('2k Upvotes') == 2000
assert quora.try_cast_int('2 K Upvotes') == 2000
assert quora.try_cast_int('2.3k Upvotes') == 2300
assert quora.try_cast_int('2.3 K Upvotes') == 2300
assert quora.try_cast_int('<span class="count">3</span>') == 3

test_try_cast_int()
35 changes: 33 additions & 2 deletions tests/test_question_statistics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
from quora import Quora, Activity
#coding=utf-8

from bs4 import BeautifulSoup
from nose import with_setup
from quora import Quora

EXPECTED_STATS_1 = {
'want_answers': 1,
'question_text': u'Is there a proof of the Four Color Theorem that does not involve substantial computation?',
'topics': [u'Science, Engineering, and Technology', u'Science', u'Formal Sciences', u'Mathematics'],
'question_details': '<div class="question_details_text inline_editor_content">This is the full question: """The Four Colour Theorem is famous for being the first long-standing mathematical problem to be resolved using a computer program. The theorem was first conjectured in 1852 by Francis Guthrie, and after over a century of work by many famous mathematicians [36,28] (including De Morgan, Peirce, Hamilton, Cayley, Birkhoff, and Lebesgue), and many incorrect “proofs”, it was finally proved by Appel and Haken in 1976 [3]. This proof broke new ground because it involved using computer programs to carry out a gigantic case analysis, which could not, in any real sense, be performed by hand: it covered, quite literally, a billion cases."""... Can we thus say that the correctness of the proof depends on the correctness of the computer program which check all the billion cases… and that the correctness of the program guarantees the correct results for the billion+1th case…? This by itself constitutes a new proof technique(?), in which one can trust a computer program (or algorithm) and assume it correct as an axiom (the same way one assumes the truth of the 5-th postulate of Euclid)… This was my main issue the first time I came across this theorem and it still is… Any thoughts…? [This is my source: <span class="qlink_container"><a class="external_link" href="http://research.microsoft.com/en-us/um/people/gonthier/4colproof.pdf]" target="_blank">http://research.microsoft<wbr></wbr>.com/en...</a></span></div>',
'answer_count': 4,
'answer_wiki': '<div class="hidden" id="answer_wiki"><div id="ld_jqjkjx_2082"><div id="__w2_bZWlZkI_wiki"></div></div></div>'
}

EXPECTED_STATS_2 = {
'want_answers': 10,
'question_text': u'If space is 3 dimensional, can time also be 3 dimensional?',
'topics': [u'Science, Engineering, and Technology', u'Science', u'Physical Sciences', u'Physics', u'Theoretical Physics'],
'question_details': '<div class="question_details_text inline_editor_content"></div>',
'answer_count': 9,
'answer_wiki': '<div class="hidden" id="answer_wiki"><div id="ld_ibyjgu_158782"><div id="__w2_unO1fVs_wiki"></div></div></div>'
}

class TestQuestionStatistics:
q = Quora()
Expand All @@ -24,4 +45,14 @@ def test_exists(self):
def test_type(self):
for stat in self.test_stats:
assert isinstance(stat['answer_count'], (int, long))
assert isinstance(stat['want_answers'], (int, long))
assert isinstance(stat['want_answers'], (int, long))

class TestQuestionScraper:

def test_correct(self):
q = Quora()
stats1 = q.scrape_question_stats(BeautifulSoup(open('tests/input_files/question_1')))
stats2 = q.scrape_question_stats(BeautifulSoup(open('tests/input_files/question_2')))
assert stats1 == EXPECTED_STATS_1
assert stats2 == EXPECTED_STATS_2

0 comments on commit 168be04

Please sign in to comment.