-
Notifications
You must be signed in to change notification settings - Fork 18
/
prepro_res.lua
170 lines (153 loc) · 4.95 KB
/
prepro_res.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
------------------------------------------------------------------------------
-- Hadamard Product for Low-rank Bilinear Pooling
-- Jin-Hwa Kim, Kyoung-Woon On, Woosang Lim, Jeonghee Kim, Jung-Woo Ha, Byoung-Tak Zhang
-- https://arxiv.org/abs/1610.04325
------------------------------------------------------------------------------
require 'nn'
require 'optim'
require 'torch'
require 'nn'
require 'math'
require 'cunn'
require 'cudnn'
require 'cutorch'
require 'image'
require 'hdf5'
cjson=require('cjson')
require 'xlua'
local t = require '../fb.resnet.torch/datasets/transforms'
-------------------------------------------------------------------------------
-- Input arguments and options
-------------------------------------------------------------------------------
cmd = torch.CmdLine()
cmd:text()
cmd:text('Options')
cmd:option('-input_json','data_train-val_test-dev_2k/data_prepro.json','path to the json file containing vocab and answers')
cmd:option('-image_root','','path to the image root')
cmd:option('-cnn_model', '', 'path to the cnn model')
cmd:option('-batch_size', 10, 'batch_size')
cmd:option('-l2norm', false, 'use L2-normalization')
cmd:option('-out_path', '/data/vqa/features.h5', 'path to output features')
cmd:option('-gpuid', 1, 'which gpu to use. -1 = use CPU')
cmd:option('-backend', 'cudnn', 'nn|cudnn')
opt = cmd:parse(arg)
print(opt)
cutorch.setDevice(opt.gpuid)
net=torch.load(opt.cnn_model);
-- Remove the fully connected layer
assert(torch.type(net:get(#net.modules)) == 'nn.Linear')
net:remove(#net.modules)
net:remove(#net.modules)
net:remove(#net.modules) -- before collapse to get 2048x14x14
net:get(8):get(3):remove(3) -- remove relu
-- print(net)
net:evaluate()
-- The model was trained with this input normalization
local meanstd = {
mean = { 0.485, 0.456, 0.406 },
std = { 0.229, 0.224, 0.225 },
}
print('=== Double Sized Full Crop ===')
local transform = t.Compose{
t.Scale(448),
t.ColorNormalize(meanstd),
t.CenterCrop(448)
}
imloader={}
function imloader:load(fname)
self.im="rip"
if not pcall(function () self.im=image.load(fname); end) then
if not pcall(function () self.im=image.loadPNG(fname); end) then
if not pcall(function () self.im=image.loadJPG(fname); end) then
end
end
end
end
function loadim(imname)
imloader:load(imname)
im=imloader.im
if im:size(1)==1 then
im2=torch.cat(im,im,1)
im2=torch.cat(im2,im,1)
im=im2
elseif im:size(1)==4 then
im=im[{{1,3},{},{}}]
end
-- Scale, normalize, and crop the image
im = transform(im)
-- View as mini-batch of size 1
im = im:view(1, table.unpack(im:size():totable()))
return im
end
local image_root = opt.image_root
-- open the mdf5 file
local features = hdf5.open(opt.out_path, 'w')
local file = io.open(opt.input_json, 'r')
local text = file:read()
file:close()
json_file = cjson.decode(text)
local train_list={}
for i,imname in pairs(json_file['unique_img_train']) do
table.insert(train_list, image_root .. imname)
end
local test_list={}
for i,imname in pairs(json_file['unique_img_test']) do
table.insert(test_list, image_root .. imname)
end
local batch_size = opt.batch_size
local sz=#train_list
print(string.format('processing %d images...',sz))
for i=1,sz,batch_size do
xlua.progress(i, sz)
r=math.min(sz,i+batch_size-1)
ims=torch.CudaTensor(r-i+1,3,448,448)
for j=1,r-i+1 do
ims[j]=loadim(train_list[i+j-1]):cuda()
end
net:forward(ims)
feat=net.output:clone()
if opt.l2norm then
local batch_size=r-i+1
local l2normalizer=nn.Sequential()
:add(nn.Transpose({2,3},{3,4}))
:add(nn.Reshape(batch_size*14*14,2048,false))
:add(nn.Normalize(2))
:add(nn.Reshape(batch_size,14,14,2048,false))
:add(nn.Transpose({3,4},{2,3}))
l2normalizer=l2normalizer:cuda()
feat=l2normalizer:forward(feat)
end
for j=1,r-i+1 do
features:write(paths.basename(train_list[i+j-1]), feat[j]:float())
end
collectgarbage()
end
print('DataLoader loading h5 file: ', 'data_train')
local sz=#test_list
print(string.format('processing %d images...',sz))
for i=1,sz,batch_size do
xlua.progress(i, sz)
r=math.min(sz,i+batch_size-1)
ims=torch.CudaTensor(r-i+1,3,448,448)
for j=1,r-i+1 do
ims[j]=loadim(test_list[i+j-1]):cuda()
end
net:forward(ims)
feat=net.output:clone()
if opt.l2norm then
local batch_size=r-i+1
local l2normalizer=nn.Sequential()
:add(nn.Transpose({2,3},{3,4}))
:add(nn.Reshape(batch_size*14*14,2048,false))
:add(nn.Normalize(2))
:add(nn.Reshape(batch_size,14,14,2048,false))
:add(nn.Transpose({3,4},{2,3}))
l2normalizer=l2normalizer:cuda()
feat=l2normalizer:forward(feat)
end
for j=1,r-i+1 do
features:write(paths.basename(test_list[i+j-1]), feat[j]:float())
end
collectgarbage()
end
features:close()