-
Notifications
You must be signed in to change notification settings - Fork 0
/
csv2bin.py
81 lines (69 loc) · 2.3 KB
/
csv2bin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# read csv and write into cifar10 bin file
import tensorflow as tf
import numpy as np
DATA_PATH = './fer2013.csv'
def read_csv():
filename_queue = tf.train.string_input_producer([DATA_PATH])
# it means you choose to skip the first line for every file in the queue
reader = tf.TextLineReader(skip_header_lines=1) # skip the first line in the file
_, value = reader.read(filename_queue)
# original rec_def
rec_def=[ [1], [''], [''] ]
# splitted rec_def - splitted file has not final field('Training', 'PubicTest', 'PrivateTest')
# rec_def=[ [1], [''] ]
content = tf.decode_csv(value, record_defaults=rec_def,field_delim=',')
#print('>>> content =', len(content) )
data = content[1]
label = content[0]
return label, data
NUM_TRAINS=28709
NUM_PUBTST=3589
NUM_PRITST=3589
TRAIN_FILE='fer2013train.bin'
PUBTST_FILE='fer2013pubtst.bin'
PRITST_FILE='fer2013pritst.bin'
def main( ):
train_file = open(TRAIN_FILE, "wb")
pubtst_file = open(PUBTST_FILE, "wb")
pritst_file = open(PRITST_FILE, "wb")
with tf.Session() as sess:
label,data = read_csv()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# Train file (the first part)
num_bytes=0
for i in range(NUM_TRAINS+1):
d,l = sess.run([data, label])
ia = str(l) + ' ' + d
ia = [ np.uint8(int(f)) for f in ia.split() ]
ba= bytearray(ia)
train_file.write(ba)
num_bytes = num_bytes + len(ba)
print('train_file num_bytes = ', num_bytes)
# public test file (the 2nd part)
num_bytes=0
for i in range(NUM_PUBTST+1):
d,l = sess.run([data, label])
ia = str(l) + ' ' + d
ia = [ np.uint8(int(f)) for f in ia.split() ]
ba= bytearray(ia)
pubtst_file.write(ba)
num_bytes = num_bytes + len(ba)
print('pub test file num_bytes = ', num_bytes)
# private test file (the 3rd part)
num_bytes=0
for i in range(NUM_PRITST+1):
d,l = sess.run([data, label])
ia = str(l) + ' ' + d
ia = [ np.uint8(int(f)) for f in ia.split() ]
ba= bytearray(ia)
pritst_file.write(ba)
num_bytes = num_bytes + len(ba)
print('priv test file num_bytes = ', num_bytes)
coord.request_stop()
coord.join(threads)
train_file.close()
pubtst_file.close()
pritst_file.close()
if __name__ == '__main__':
main()