-
Notifications
You must be signed in to change notification settings - Fork 0
/
Testing.py
35 lines (29 loc) · 889 Bytes
/
Testing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import Methods as m
import Stanford
import pandas as pd
import re
# import main
# stop_words = m.read_stopwords()
test_set = pd.read_csv('venv/Data/TestSet3 v7.csv', names=['ID', 'text', 'keywords', 'Person', 'Location', 'Date', 'Organization', 'Nouns', 'Verbs', 'Synonyms'], header=1)
print(len(test_set))
print(type(test_set))
keyword =[]
text = []
for record in test_set['keywords']:
record = str(record)
record = re.sub(r"\n", "", record)
l = record.split(",")
keyword.append(l)
for txt in test_set['text']:
txt = str(txt)
text.append(txt)
print (text[1])
#
# for x in range(99):
#
# candidate_keywords = Stanford.extract_candidate_keywords(text[x])
# candidate_keywords = [m.lower() for m in candidate_keywords]
#
# keywords = list(dict.fromkeys(candidate_keywords)) # remove duplicates
# print(keywords)
# print(text[x], keyword[x])