forked from maciejkula/glove-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
glovePython.py
28 lines (23 loc) · 874 Bytes
/
glovePython.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from glove import Glove, Corpus
inputFile = "/media/charles/data/nlp/zzz1000"
corpusModelFile = "/media/charles/data/nlp/corpus_wiki.model"
outputFile = "/media/charles/data/nlp/glove_wiki.model"
epochs = 10
nb_threads = 4
def get_text(fin):
f = open(fin)
for line in f:
yield line[:-1].split(' ')
#corpus_model = Corpus()
#print("computing coocurrence matrix...")
#corpus_model.fit(get_text(inputFile), window=10)
#print("saving coocurrence matrix...")
#corpus_model.save(corpusModelFile)
corpus_model = Corpus.load(corpusModelFile)
print("fitting model...")
glove = Glove(no_components=200, learning_rate=0.05)
glove.fit(corpus_model.matrix, epochs=epochs,
no_threads=nb_threads, verbose=True)
glove.add_dictionary(corpus_model.dictionary)
print("saving model to "+outputFile+" ...")
glove.save(outputFile)