-
Notifications
You must be signed in to change notification settings - Fork 4
/
senticr.py
113 lines (110 loc) · 2.86 KB
/
senticr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import openpyxl
import string
path="C://Users/Nasif/Desktop/GitHubdataProject/710results"
os.chdir(path)
total=0
true_pos=0
false_pos=0
label='neutral'
#for sentiment
wb=openpyxl.load_workbook('sentiment_firstset_data.xlsx')
ws=wb.active
bucket={}
rates=["negative","neutral","positive"]
tool=["sentiCR"]
tool_col=["H"]
alchemy_error=0
bucket={}
for x in range(0,1):
for i in rates:
for j in rates:
bucket[(i,j)]=0
for i in range(2,591):
a=ws['B'+str(i)].value
if a==None or a=='' or a=='sarcasm':
#print (i)
continue
a=a.strip()
b=ws[tool_col[x]+str(i)].value
b=b.strip()
if(b=="not enough data" or b=="problem"):
alchemy_error+=1
continue
if b=='neg':
b='negative'
if b=='pos':
b='positive'
bucket[(a,b)]+=1
if a==label:
total+=1
if b==label:
true_pos+=1
else:
if b==label:
false_pos+=1
precision= (true_pos/total)*100
recall= (true_pos/(true_pos+false_pos))*100
f_measure= (2*(precision*recall))/(precision+recall)
print (tool[x],precision,recall,f_measure)
if len(bucket)!=9:
print ("alert! alert! alert!")
print(bucket)
#print (bucket)
#calculate weighted kohen's kappa
observation_sum=0
for k in bucket.keys():
if k[0]==k[1]:
observation_sum+=0
#print (k,bucket[k],observation_sum)
elif "neutral" in k:
observation_sum+=bucket[k]
#print (k,bucket[k],observation_sum)
else:
observation_sum=observation_sum+bucket[k]*2
#print (k,bucket[k],observation_sum)
#calculate kohen's kappa
#print (observation_sum)
observation_sum=0
for k in bucket.keys():
if k[0]==k[1]:
observation_sum+=0
elif "neutral" in k:
observation_sum+=bucket[k]
else:
observation_sum=observation_sum+bucket[k]*2
#calculate kohen's kappa
agreement=0
total=0
values=[]
for k in bucket.keys():
total+=bucket[k]
if k[0] not in values:
values.append(k[0])
if k[1] not in values:
values.append(k[1])
if k[0]==k[1]:
agreement+=bucket[k]
print (agreement,total, values)
expected_freq={}
for i in bucket.keys():
expected_freq[i]=0
row_total=0
col_total=0
for j in bucket.keys():
if j[0]==i[0]:
row_total+=bucket[j]
if j[1]==i[1]:
col_total+=bucket[j]
f=(col_total*row_total)/total
expected_freq[i]=f
expectation_sum=0
for k in expected_freq.keys():
if k[0]==k[1]:
expectation_sum+=0
elif "neutral" in k:
expectation_sum+=expected_freq[k]
else:
expectation_sum=expectation_sum+expected_freq[k]*2
weighted_k=1-(observation_sum/expectation_sum)
print (weighted_k)