-
Notifications
You must be signed in to change notification settings - Fork 0
/
digit_dataset.py
70 lines (59 loc) · 2.19 KB
/
digit_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import cv2
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
def make_dataset(path='data', skip0=True):
images = []
labels = []
if skip0:
for i in range(1, 10):
pic_paths = os.listdir(os.path.join(path, str(i)))
for file_name in pic_paths:
image = cv2.imread(os.path.join(path, str(i), file_name), cv2.IMREAD_GRAYSCALE)
image = cv2.equalizeHist(image)
image = 255 - image
image = cv2.resize(image, (32, 32))
images.append(np.expand_dims(image, 0).astype(np.float32))
labels.append(i-1)
else:
for i in range(0, 10):
pic_paths = os.listdir(os.path.join(path, str(i)))
for file_name in pic_paths:
image = cv2.imread(os.path.join(path, str(i), file_name), cv2.IMREAD_GRAYSCALE)
image = cv2.equalizeHist(image)
image = 255 - image
image = cv2.resize(image, (32, 32))
images.append(np.expand_dims(image, 0).astype(np.float32))
labels.append(i-1)
print('Total images: ', len(images))
print('Total labels: ', len(labels))
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.3, random_state=17)
return DigitDataset(X_train, y_train), DigitDataset(X_test, y_test)
def make_data(path='data', skip0=True):
train_dataset, test_dataset = make_dataset(path, skip0)
params = {
'train': {
'images': train_dataset.images,
'labels': train_dataset.labels,
},
'test': {
'images': test_dataset.images,
'labels': test_dataset.labels
}
}
torch.save(params, 'data/data.pt')
class DigitDataset(Dataset):
def __init__(self, images, labels):
self.images = images
self.labels = labels
def __getitem__(self, idx):
image = self.images[idx]
image = torch.from_numpy(image)
label = self.labels[idx]
return image, label
def __len__(self):
return len(self.images)
if __name__ == '__main__':
make_data()