-
Notifications
You must be signed in to change notification settings - Fork 0
/
formembedding.py
103 lines (76 loc) · 2.24 KB
/
formembedding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
fp=open('/home/sasank/nlp/glove.twitter.27B/glove.twitter.27B.100d.txt')
dimension=100
glove=dict()
i=0
for line in fp:
values=line.strip('\n').split(' ')
glove[values[0]]=values[1:len(values)]
i=i+1
if i%100000==0:
print i
fp.close()
for emotion in ['anger','joy','sadness','fear'] :
inp='Tweetsdata/'+emotion+'_train_pproc.txt'
op=emotion+'_train_100'+'.npy'
out=list()
fp=open(inp)
for line in fp:
vals=line.split('\t')
count=0
allsum=np.zeros((dimension,),dtype="float32")
for word in vals[0].split(' '):
x=glove.get(word)
if x is not None:
y=[float(v) for v in x]
allsum=np.add(allsum,y)
count=count+1
if count!=0:
allsum=np.divide(allsum,np.max(allsum))
out.append(allsum)
np.save(op,out)
fp.close()
print emotion+'train done'
inp='Tweetsdata/'+emotion+'_test_pproc.txt'
op=emotion+'_test_100'+'.npy'
out=list()
fp=open(inp)
for line in fp:
vals=line.split('\t')
count=0
allsum=np.zeros((dimension,),dtype="float32")
for word in vals[0].split(' '):
x=glove.get(word)
if x is not None:
y=[float(v) for v in x]
allsum=np.add(allsum,y)
count=count+1
if count!=0:
allsum=np.divide(allsum,np.max(allsum))
out.append(allsum)
np.save(op,out)
fp.close()
print emotion+'test done'
'''
score_train=list()
fp=open('Tweetsdata/anger_train_pproc.txt')
for line in fp:
values= line.split('\t')
score_train.append(float(values[1]))
fp.close()
score_gold=list()
fp=open('Tweetsdata/anger_test_pproc.txt')
for line in fp:
values= line.split('\t')
score_gold.append(float(values[1]))
fp.close()
ml_model = LogisticRegression
print X_train
ml_model.fit(X_train, score_train)
y_pred = ml_model.predict(X_test)
#score = evaluate_lists(y_pred, score_gold)
print y_pred,score_gold
pears_corr = scipy.stats.pearsonr(y_pred, score_gold)[0]
spear_corr = scipy.stats.spearmanr(y_pred, score_gold)[0]
print pears_corr,spear_corr
'''