-
Notifications
You must be signed in to change notification settings - Fork 0
/
experiment_wordnet.py
67 lines (54 loc) · 1.95 KB
/
experiment_wordnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
from nltk.corpus import wordnet
from rdflib import Graph, Literal, Namespace
from triplestore import TripleStore
def calculate_trash_score(obj):
if obj == "garbage_bag":
obj = "bag"
if obj == "plastic_bottle":
obj = "bottle"
if obj == "broken_glass":
obj = "glass"
obj_wn = wordnet.synset(obj + '.n.01')
# the higher the relatedness, the more likely object is trash
inside = wordnet.synset('inside.n.01')
trash = wordnet.synset('trash.n.01')
inside_score = obj_wn.wup_similarity(inside)
trash_score = obj_wn.wup_similarity(trash)
total_pos_corr = inside_score + trash_score
# the higher the relatedness, the less likely object is trash
outside = wordnet.synset('outside.n.01')
outside_score = obj_wn.wup_similarity(outside)
total_neg_corr = outside_score
return total_pos_corr - 2*total_neg_corr
def get_trash_score(obj):
wnns = Namespace('http://wordnet.princeton.edu/')
s = wnns[obj]
p = wnns['trash_score']
graph = TripleStore.get_triples_as_graph(s=s, p=p)
if not graph:
# no score known
score = calculate_trash_score(obj)
o = Literal(score)
graph.add((s, p, o))
graph.bind('wn', wnns)
# upload for future queries
TripleStore.upload_triples_as_graph(graph)
for s, p, o in graph.triples((s, p, None)):
# return first result
return o
if __name__ == "__main__":
df = pd.read_csv("test-data.csv")
results = []
for _, row in df.iterrows():
obj = row['object']
trash = row['trash']
score = get_trash_score(obj)
class_as_trash = int(score > Literal(-0.1))
new_row = {"object": obj,
"trash": trash,
"score": score,
"classified_as_trash": class_as_trash,
"correct": int(trash == class_as_trash)}
results.append(new_row)
print(pd.DataFrame(results))