-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_mongodb.py
81 lines (67 loc) · 2.67 KB
/
test_mongodb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import nltk
from nltk.corpus import stopwords
from bson.objectid import ObjectId
from tripadvisor.entities import TaHotel
from review import Review,ReviewsDal
StopWords = frozenset(stopwords.words("english"))
DbName = "tripadvisor_train"
def get_reviews(hotel_file):
hotel = TaHotel(hotel_file)
for index,tareview in enumerate( hotel.reviews):
review = Review()
review.business_id = hotel.id
review.ratings = tareview.ratings
review.assign_content(tareview.entire_content(),StopWords)
yield review
def test_review_to_dict():
datafile = "tripadvisor/data/2514286.json"
for index,review in enumerate(get_reviews(datafile)):
print "********** {}-th review: ".format(index+1)
print review.to_dict()
def test_insert_db():
datafile = "tripadvisor/data/536101.json"
dal = ReviewsDal(DbName)
dal.insert_many(get_reviews(datafile))
print "INSERTED INTO DATABASE"
def test_list_all_ids():
dal = ReviewsDal(DbName)
print dal.list_ids()
def print_review_by_review_id(reviewid):
dal = ReviewsDal(DbName)
review = dal.find_by_review_id(reviewid,True)
print "************ REVIEW <{}> ************".format(review.id)
print "business_id: {}".format(review.business_id)
print "ratings: {}".format(review.ratings)
for index,sentence in enumerate( review.sentences):
print "\t[{}]: {}".format(index+1,sentence.raw)
if sentence.words is not None:
print "\t{}".format(sentence.words)
print "\t ##### aspect: {}".format(sentence.aspect)
print "\t ##### sentiment: {}".format(sentence.sentiment)
def test_update_aspect_sentiment():
reviewid = ObjectId("57b0030160c0ff0f9b37a8fc")
print "=========================== before update"
print_review_by_review_id(reviewid)
# update
dal = ReviewsDal(DbName)
new_aspects_sentiments = {
3:("Value","Positive"),
8:("Service","Negative")
}
success = dal.update_aspects_sentiments(reviewid,new_aspects_sentiments)
print "update successful? {}".format(success)
print "=========================== after update"
print_review_by_review_id(reviewid)
def test_list_sentences_by_aspect():
aspect = "Value"
dal = ReviewsDal("tripadvisor_train")
for index,sentence in enumerate(dal.sentences_stream_by_aspect(aspect)):
print "\n********** [{}] {}: {}".format(index+1,sentence.aspect,sentence.sentiment)
print sentence.raw
if __name__ == "__main__":
# test_review_to_dict()
# test_insert_db()
# test_list_all_ids()
print_review_by_review_id(ObjectId("57b3c96560c0ff08b163a0b3"))
# test_update_aspect_sentiment()
# test_list_sentences_by_aspect()