-
Notifications
You must be signed in to change notification settings - Fork 0
/
RoboMongo.py
58 lines (44 loc) · 1.54 KB
/
RoboMongo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pymongo
from pymongo import MongoClient
import nltk
import re
client = MongoClient('localhost', 27017)
db = client.stock_exchange_news_feeds
col = db.OilStories.find()
#creation of table
#storyOne = {}
#storyOne['WallStreetJournal'] = 'Oil is up today'
#db.OilStories.insert(storyOne)
#storyTwo = {}
#storyTwo['NewYorkTimes'] = 'OPEC lowered their oil production targets'
#db.OilStories.insert(storyTwo)
#storyThree = {}
#storyThree['MarketWatch'] = 'Why oil could go much lower'
#db.OilStories.insert(storyThree)
#storyFour = {}
#storyFour['MotleyFool'] = 'Oil will rise over 100 again'
#db.OilStories.insert(storyFour)
#storyFive = {}
#storyFive['WashingtonPost'] = 'Oil tanker collides with cruise ship in Carribean'
#db.OilStories.insert(storyFive)
storyEconomist = {}
storyEconomist['Economist'] = 'The oil conundrum. Plunging prices have neither halted oil production nor stimulated a surge in global growth.'
db.OilStories.insert(storyEconomist)
#for key, value in d.iteritems():
#print key, '\t', value
storyContents = []
for story in col:
#print(story)
for source, content in story.items():
if (source != "_id"):
storyContents.append(content)
for story in storyContents:
tokenized = nltk.word_tokenize(story)
tagged = nltk.pos_tag(tokenized)
namedEnt = nltk.ne_chunk(tagged, binary=True)
print(namedEnt)
entities = re.findall(r'NE\s(.*?)/', str(namedEnt))
descriptives = re.findall(r'\(\'(\w*)\',\s\'JJ\w?\'', str(tagged))
print(entities)
print('-----------')
#print(descriptives)