-
Notifications
You must be signed in to change notification settings - Fork 1
/
relo.py
36 lines (31 loc) · 1.36 KB
/
relo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import nltk
import relations
tokenizer = nltk.tokenize.punkt.PunktSentenceTokenizer()
wordnet_lemmatizer = nltk.stem.WordNetLemmatizer()
punkt_word_tokenizer = nltk.tokenize.WordPunctTokenizer()
def get_relations(text,ner_tags,lang_tag):
all_relations = {}
if lang_tag == 'en':
sentences = tokenizer.tokenize(text)
relationships = set([wordnet_lemmatizer.lemmatize(i.lower()) for i in relations.relations])
for sentence in sentences:
tokenized_sentence = punkt_word_tokenizer.tokenize(sentence)
lemmatized_sentence = [wordnet_lemmatizer.lemmatize(token.lower()) for token in tokenized_sentence]
a,b,r = '','',''
for word in lemmatized_sentence:
if word in relationships:
r = word
elif word in ner_tags:
if a == '':
a = word
else:
b = word
if (a!='') or (b!='') or (r!=''):
print(lemmatized_sentence)
print((a,b,r))
if (((a != '') and (b!='')) and (r!='')):
if (a,b,r) in all_relations:
all_relations[(a,b,r)] = all_relations[(a,b,r)]+1
else:
all_relations[(a,b,r)] = 1
return all_relations