forked from OpenNMT/OpenNMT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lm.lua
115 lines (90 loc) · 2.41 KB
/
lm.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
require('onmt.init')
local cmd = onmt.utils.ExtendedCmdLine.new('lm.lua')
local options = {
{
'mode', 'string',
[['score' apply lm to input text, 'sample' samples output based on input text.]],
{
enum = { 'score', 'sample' }
}
},
{
'-src', '',
[[Source sequences to sample/score.]],
{
valid = onmt.utils.ExtendedCmdLine.nonEmpty
}
},
{
'-output', 'output.txt',
[[Output file depend on `<mode>`.]]
}
}
cmd:setCmdLineOptions(options, 'Data')
onmt.lm.LM.declareOpts(cmd)
onmt.utils.Cuda.declareOpts(cmd)
onmt.utils.Logger.declareOpts(cmd)
cmd:text('')
cmd:text('Other options')
cmd:text('')
cmd:option('-time', false, [[Measure average translation time.]])
local function main()
local opt = cmd:parse(arg)
_G.logger = onmt.utils.Logger.new(opt.log_file, opt.disable_logs, opt.log_level, opt.log_tag)
onmt.utils.Cuda.init(opt)
local lm = onmt.lm.LM.new(opt)
local srcReader = onmt.utils.FileReader.new(opt.src)
local srcBatch = {}
local outFile = io.open(opt.output, 'w')
local sentId = 1
local batchId = 1
local timer
if opt.time then
timer = torch.Timer()
timer:stop()
timer:reset()
end
while true do
local srcTokens = srcReader:next()
if srcTokens ~= nil then
table.insert(srcBatch, lm:buildInput(srcTokens))
elseif #srcBatch == 0 then
break
end
if srcTokens == nil or #srcBatch == opt.batch_size then
if opt.time then
timer:resume()
end
local results
if opt.mode == 'score' then
results = lm:evaluate(srcBatch)
else
results = lm:sample(srcBatch, opt.max_length, opt.temperature)
end
if opt.time then
timer:stop()
end
for b = 1, #results do
_G.logger:info('SENT %d: %s', sentId, results[b])
outFile:write(results[b] .. '\n')
sentId = sentId + 1
end
if srcTokens == nil then
break
end
batchId = batchId + 1
srcBatch = {}
collectgarbage()
end
end
if opt.time then
local time = timer:time()
local sentenceCount = sentId-1
_G.logger:info("Average sentence processing time (in seconds):\n")
_G.logger:info("avg real\t" .. time.real / sentenceCount .. "\n")
_G.logger:info("avg user\t" .. time.user / sentenceCount .. "\n")
_G.logger:info("avg sys\t" .. time.sys / sentenceCount .. "\n")
end
_G.logger:shutDown()
end
main()