-
Notifications
You must be signed in to change notification settings - Fork 48
/
h5py_init.py
428 lines (322 loc) · 14.6 KB
/
h5py_init.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
import os
import glob
import h5py
import numpy as np
#import cv2
from util import *
import sys
'''
Note, these functions will not work without setting up the directories of video frames as shown in get_dir_lists.
Alternatively, contact me to get access to the final h5Py datasets, which this code procudes.
'''
root_drive = '.' #Current dir for now
#if not os.path.isdir(root_drive):
# print('Using Sharcnet equivalent of root_drive')
# root_drive = '/home/jjniatsl/project/jjniatsl/Fall-Data'
def get_dir_lists(dset):
'''
This shows structure which frames must be in
Params:
str dset: dataset to be loaded
Returns:
paths to ADL and Fall videos
'''
#----------USe these for N: drive located Fall-Data
#root_drive = 'N:/FallDetection/Jacob/Fall-Data/' #Put Path to video frames
path_Fall = root_drive + '/Fall-Data/{}/Fall/Fall*'.format(dset)
path_ADL = root_drive + '/Fall-Data/{}/NonFall/ADL*'.format(dset)
if dset == 'Thermal-Dummy':
path_Fall = root_drive + '/Fall-Data/Thermal-Dummy/Fall/Fall*'
path_ADL = root_drive + '/Fall-Data/Thermal-Dummy/NonFall/ADL*'
elif dset == 'Thermal':
path_Fall = root_drive + '/Fall-Data/Thermal/Fall/Fall*'
path_ADL = root_drive + '/Fall-Data/Thermal/NonFall/ADL*'
elif dset == 'UR':
path_Fall = root_drive + '/Fall-Data/UR_Kinect/Fall/original/Fall*'
path_ADL = root_drive + '/Fall-Data/UR_Kinect/NonFall/original/adl*'
elif dset == 'UR-Filled':
path_Fall = root_drive + '/Fall-Data/UR_Kinect/Fall/filled/Fall*'
path_ADL = root_drive + '/Fall-Data/UR_Kinect/NonFall/filled/adl*'
elif dset == 'SDU':
path_Fall = root_drive + '/Fall-Data/SDUFall/Fall/Fall*/Depth'
path_ADL = root_drive + '/Fall-Data/SDUFall/NonFall/ADL*/Depth'
elif dset == 'SDU-Filled':
path_Fall = root_drive + '/Fall-Data/SDUFall/Fall/Fall*/Filled'
path_ADL = root_drive + '/Fall-Data/SDUFall/NonFall/ADL*/Filled'
print(path_Fall, path_ADL)
vid_dir_list_Fall = glob.glob(path_Fall)
vid_dir_list_ADL = glob.glob(path_ADL)
if len(vid_dir_list_Fall) == 0:
print('no Fall vids found')
if len(vid_dir_list_ADL) == 0:
print('no ADL vids found')
return vid_dir_list_ADL, vid_dir_list_Fall
def init_videos(img_width = 64, img_height = 64, \
raw = False, dset = 'Thermal'):
'''
Creates or overwrites h5py group corresponding to root_path (in body), for the h5py file located at
'N:/FallDetection/Fall-Data/H5Data/Data_set-{}-imgdim{}x{}.h5'.format(dset, img_width, img_height).
The h5py group of nested groups is structured as follows:
Processed (or Raw)
Split_by_video
ADL1
Data
<HDF5 dataset "Data": shape (1397, 4096), type "<f8">
Labels
<HDF5 dataset "Labels": shape (1397,), type "<i4">
ADL2
Data
<HDF5 dataset "Data": shape (3203, 4096), type "<f8">
Labels
<HDF5 dataset "Labels": shape (3203,), type "<i4">
.
.
.
ADL{N}
Data
<HDF5 dataset "Data": shape (3203, 4096), type "<f8">
Labels
<HDF5 dataset "Labels": shape (3203,), type "<i4">
Fall1
Data
<HDF5 dataset "Data": shape (49, 4096), type "<f8">
Labels
<HDF5 dataset "Labels": shape (49,), type "<i4">
.
.
.
Fall{M}
Data
<HDF5 dataset "Data": shape (49, 4096), type "<f8">
Labels
<HDF5 dataset "Labels": shape (49,), type "<i4">
where N is number of ADL videos, and M is number of Fall videos.
Params:
bool raw: if true, data will be not processed (mean centering and intensity scaling)
int img_wdith: width of images
int img_height: height of images
str dset: dataset to be loaded
'''
path = root_drive + '/H5Data/Data_set-{}-imgdim{}x{}.h5'.format(dset, img_width, img_height)
#print(root_drive + '/H5Data/{}/'.format(dset))
vid_dir_list_0, vid_dir_list_1 = get_dir_lists(dset)
if len(vid_dir_list_0) == 0 and len(vid_dir_list_1) == 0:
print('no videos found, make sure video files are placed in Fall-Data folde, terminating...')
sys.exit()
if raw == False:
root_path = dset + '/Processed/Split_by_video'
else:
root_path = dset + '/Raw/Split_by_video'
print('creating data at root_path', root_path)
def init_videos_helper(root_path): #Nested to keep scope
with h5py.File(path, 'a') as hf:
#root_sub = root.create_group('Split_by_video')
root = hf.create_group(root_path)
for vid_dir in vid_dir_list_1:
init_vid(vid_dir = vid_dir, vid_class = 1, img_width = img_width, img_height = img_height,\
hf = root, raw = raw, dset = dset)
for vid_dir in vid_dir_list_0:
init_vid(vid_dir = vid_dir, vid_class = 0, img_width = img_width, img_height = img_height, \
hf = root, raw = raw, dset = dset)
if os.path.isfile(path):
hf = h5py.File(path, 'a')
if root_path in hf:
print('video h5py file exists, deleting old group {}, creating new'.format(root_path))
del hf[root_path]
hf.close()
init_videos_helper(root_path)
else:
print('File exists, but no group for this data set; initializing..')
hf.close()
init_videos_helper(root_path)
else:#not initialized
print('No data file exists yet; initializing')
init_videos_helper(root_path)
def init_vid(vid_dir = None, vid_class = None, img_width = 32, img_height = 32,\
hf = None, raw = False, dset = 'Thermal'):
'''
helper function for init_videos. Initialzies a single video.
Params:
str vid_dir: path to vid dir of frames to be initialzied
int vid_class: 1 for Fall, 0 for NonFall
h5py group: group within which new group is nested
'''
print('initializing vid at', vid_dir)
data = create_img_data_set(fpath = vid_dir, ht = img_height, wd = img_width, raw = raw, sort = True, dset = dset)
labels = np.zeros(len(data))
if dset == 'SDU' or dset == 'SDU-Filled':
vid_dir_name = os.path.basename(os.path.dirname(vid_dir))
else:
vid_dir_name = os.path.basename(vid_dir)
print('vid_dir_name', vid_dir_name)
grp = hf.create_group(vid_dir_name)
if vid_dir_name in ['Fall' + str(i) for i in range(201)]: #201 is max fall index across all vids
print('setting fall start')
Fall_start, Fall_stop = get_fall_indeces(vid_dir_name, dset)
labels[Fall_start:Fall_stop + 1] = 1
grp['Labels'] = labels
grp['Data'] = data
def get_fall_indeces(Fall_name, dset):
root_dir = './Fall-Data/'
labels_dir = root_dir + '/{}/Labels.csv'.format(dset)
#print(labels_dir)
import pandas as pd
my_data = pd.read_csv(labels_dir, sep=',', header = 0, index_col = 0)
start,stop = my_data.loc[Fall_name]
print('start,stop', start,stop)
#print(my_data)
return start,stop
def sort_frames(frames, dset):
#Sorting, trying for differnt dataset string formats
if dset == 'SDU' or dset == 'SDU-Filled': #TODO remove try except, failing to sort shoudl stop!
print('sorting SDU frames...')
#try:
frames = sorted(frames, key = lambda x: int(os.path.basename(x).split('.')[0])) #SDU
# except ValueError:
# print('failed to sort SDU vid frames')
# pass
elif dset == 'UR' or dset == 'UR-Filled' or dset == 'Thermal':
print('sorting UR or Thermal frames...')
try:
frames = sorted(frames, key = lambda x: int(x.split('-')[-1].split('.')[0]))
except ValueError:
print('failed to sort UR vid frames')
return
elif dset == 'TST':
try:
frames = sorted(frames, key = lambda x: int(x.split('_')[-1].split('.')[0]))
except ValueError:
print('failed to sort vid frames, trying again....')
pass
elif dset == 'FallFree' or dset == 'FallFree-Filled':
try:
frames = sorted(frames, key = lambda x: int(x.split('_')[2]))
except ValueError:
print('failed to sort vid frames, trying again....')
pass
return frames
def create_img_data_set(fpath, ht = 64, wd = 64, raw = False, sort = True, dset = 'Thermal'):
'''
Creates data set of all images located at fpath. Sorts images
Params:
str fpath: path to images to be processed
bool raw: if True does mean centering and rescaling
bool sort: if True, sorts frames, ie. keeps sequential order, which may be lost due to glob
dset: dataset
Returns:
ndarray data: Numpy array of images at fpath. Shape (samples, img_width*img_height),
samples isnumber of images at fpath.
'''
#print('gathering data at', fpath)
fpath = fpath.replace('\\', '/')
# print(fpath+'/*.png')
frames = glob.glob(fpath+'/*.jpg') + glob.glob(fpath+'/*.png')
if sort == True:
frames = sort_frames(frames, dset)
#print("\n".join(frames)) #Use this to check if sorted
data=np.zeros((frames.__len__(),ht,wd,1))
for x,i in zip(frames, range(0,frames.__len__())):
#print(x,i)
img=cv2.imread(x,0) #Use this for RGB to GS
#print('x', x)
#img=cv2.imread(x,-1) #Use this for loading as is(ie. 16 bit needs this, else gets converted to 8)
# print('img.shape', img.shape)
img=cv2.resize(img,(ht,wd))#resize
img=img.reshape(ht,wd,1)
if raw == False:
#print('proccessing data')
img=img-np.mean(img)#Mean centering
img=img.astype('float32') / 255. #rescaling
data[i,:,:,:]=img
# data = data.reshape((len(data), np.prod(data.shape[1:]))) #Flatten the images
print('data.shape', data.shape)
return data
def init_data_by_class(vid_class = 'NonFall', dset = 'Thermal',\
raw = False, img_width = 64, img_height = 64, use_cropped = False):
'''
Creates or overwrites h5py group corresponding to root_path (in body), for the h5py file located at
'N:/FallDetection/Fall-Data/H5Data/Data_set-{}-imgdim{}x{}.h5'.format(dset, img_width, img_height).
Creates the following structure:
Processed
Split_by_class
NonFall
Data
<HDF5 dataset "Data": shape (22116, 4096), type "<f8">
Labels
<HDF5 dataset "Labels": shape (22116,), type "<i4">
Fall
Data
<HDF5 dataset "Data": shape (22116, 4096), type "<f8">
Labels
<HDF5 dataset "Labels": shape (22116,), type "<i4">
'''
ht,wd = img_width, img_height
if dset == 'Thermal':
if vid_class == 'NonFall':
fpath= root_drive + '/Fall-Data/Thermal/{}/ADL*'.format(vid_class)
elif vid_class == 'Fall':
fpath= root_drive + '/Fall-Data/Thermal/{}/Fall*'.format(vid_class)
else:
print('invalid vid class')
return
elif dset == 'UR-Filled':
if vid_class == 'NonFall':
fpath= root_drive + '/Fall-Data/UR_Kinect/{}/filled/adl*'.format(vid_class)
else:
fpath= root_drive + '/Fall-Data/UR_Kinect/{}/filled/Fall*'.format(vid_class)
elif dset == 'UR':
if vid_class == 'NonFall':
fpath= root_drive + '/Fall-Data/UR_Kinect/{}/original/adl*'.format(vid_class)
else:
fpath= root_drive + '/Fall-Data/UR_Kinect/{}/original/Fall*'.format(vid_class)
elif dset == 'SDU':
fpath = root_drive + '/Fall-Data/SDUFall/{}/ADL*/Depth'.format(vid_class)
elif dset == 'SDU-Filled':
fpath = root_drive + '/SDUFall/{}/ADL*/Filled'.format(vid_class)
data = create_img_data_set(fpath, ht, wd, raw, False) #Don't need to sort
if data.shape[0] == 0:
print('no data found, make sure video files are placed in Fall-Data folder, terminating')
sys.exit()
#path = './H5Data/Data_set_imgdim{}x{}.h5'.format(img_width, img_height) #Old
#path = 'N:/FallDetection/Fall-Data/H5Data/Data_set_imgdim{}x{}.h5'.format(img_width, img_height) #Old
path = root_drive + '/H5Data/Data_set-{}-imgdim{}x{}.h5'.format(dset, img_width, img_height)
if raw == False:
root_path = dset + '/Processed/Split_by_class/'+ vid_class #root path is for h5py tree
else:
root_path = dset + '/Raw/Split_by_class/'+ vid_class
if vid_class == 'NonFall':
labels = np.array([0] * len(data))
else:
labels = np.array([1] * len(data))
with h5py.File(path, 'a') as hf:
#root_sub = root.create_group('Split_by_video')
print('creating data at ', root_path)
if root_path in hf:
print('root_path {} found, clearing'.format(root_path))
del hf[root_path]
root = hf.create_group(root_path)
root['Data'] = data
root['Labels'] = labels
def flip_windowed_arr(windowed_data):
"""
windowed_data: of shape (samples, win_len,...)
returns shape len(windowed_data), win_len, flattened_dim)
Note: Requires openCV
"""
win_len = windowed_data.shape[1]
flattened_dim = np.prod(windowed_data.shape[2:])
#print(flattened_dim)
flipped_data_windowed = np.zeros((len(windowed_data), win_len, flattened_dim)) #Array of windows
print(flipped_data_windowed.shape)
i=0
for win_idx in range(len(windowed_data)):
window = windowed_data[win_idx]
flip_win = np.zeros((win_len, flattened_dim))
for im_idx in range(len(window)):
im = window[im_idx]
hor_flip_im = cv2.flip(im,1)
#print(hor_flip_im.shape)
#print(flip_win[im_idx].shape)
flip_win[im_idx] = hor_flip_im.reshape(flattened_dim)
flipped_data_windowed[win_idx] = flip_win
return flipped_data_windowed