-
Notifications
You must be signed in to change notification settings - Fork 0
/
classifyCatvsDog.py
119 lines (101 loc) · 4.16 KB
/
classifyCatvsDog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from __future__ import division
import cv2
import numpy as np
import glob
import os
from sklearn.cluster import KMeans, MiniBatchKMeans
import scipy.cluster.vq as vq
import pdb
import cPickle as pickle
import numpy
from sklearn import svm
import random
import os.path
"""
1. Read in images from folder
2. Find sift descriptors on all
3. Create codebook with all descriptors
4. for each image, create histogram of features
5. Save to file
6. Use svm to classify target image
"""
folder_name = "/Users/mm71593/classify/train"
sift = cv2.SIFT()
num_clusters = 300
image_to_descriptors = {}
list_of_image_names = glob.glob(os.path.join(folder_name, "*.jpg"))
all_image_histograms = {}
kmeans = MiniBatchKMeans(n_clusters = num_clusters, batch_size = 100, max_iter = 250)
sift_descriptors_filename = "sift_descriptors.pickle"
def count_type_in_list(mylist, mytype):
return len([elem for elem in mylist if os.path.basename(elem).split(".")[0] == mytype ])
def get_sift_descriptors(image_name):
img = cv2.imread(image_name)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
[kp, desc] = sift.detectAndCompute(gray, None)
#return only subset
num_elems = min(100, len(desc))
return random.sample(desc, num_elems)
def compute_codebook(list_of_sift_descriptors):
return vq.kmeans2(list_of_sift_descriptors, num_clusters)
def create_histogram(labels):
hist, edges = np.histogram(labels, bins=range(num_clusters), normed=True)
return hist
def create_labels_matrix(image_names):
return [ os.path.basename(image_name).split(".")[0] for image_name in image_names]
#takes in list of image names, returns has with image_name -> matrix of sift descriptors
def build_dataset(list_of_image_names):
image_to_descriptors = {}
for index,image_name in enumerate(list_of_image_names):
if index % 1000 == 0: print index
image_to_descriptors[image_name] = get_sift_descriptors(image_name)
return image_to_descriptors
#dumps descriptors to file
def dump_sift_descriptors_to_file(filename, image_to_descriptors_hash):
with open(filename, "wb") as myfile:
pickle.dump(image_to_descriptors_hash, myfile)
#loads descriptors from file
def load_sift_descriptors_from_file(filename):
with open(filename, "rb") as myfile:
return pickle.load(myfile)
def classify_images(svm, list_of_image_names):
labels = []
for index, image_name in enumerate(list_of_image_names):
target_hist = create_histogram((kmeans.predict(get_sift_descriptors(image_name))))
labels.append(clf.predict(target_hist))
return labels
def test_classification(predicted_labels, trained_labels):
total_correct = 0
for elem1, elem2 in zip(predicted_labels, trained_labels):
if elem1 == elem2:
total_correct += 1
return total_correct / len(predicted_labels)
#sift descriptors part
if os.path.isfile(sift_descriptors_filename ):
print "Loading sift descriptors"
image_to_descriptors = load_sift_descriptors_from_file(sift_descriptors_filename )
else:
print "Creating sift descriptors from images and dumping to file"
image_to_descriptors = build_dataset(list_of_image_names)
dump_sift_descriptors_to_file(sift_descriptors_filename , image_to_descriptors)
#make a list of all sift descriptors to be fed into kmeans
list_of_sift_descriptors = np.vstack(image_to_descriptors.values())
#setup kmeans
print "Setting up kmeans"
kmeans.fit(list_of_sift_descriptors)
#create bow histograms for images
for index,image_name in enumerate(list_of_image_names):
if index % 1000 ==0: print index
#image_to_descriptors[image_name] = get_sift_descriptors(image_name)
labels = kmeans.predict(image_to_descriptors[image_name])
all_image_histograms[image_name] = create_histogram(labels)
correct_labels = create_labels_matrix(all_image_histograms.keys())
###SVM Training
print "Training svm"
clf = svm.SVC()
clf.fit(all_image_histograms.values(), correct_labels)
print "Predicting labels"
predicted_labels = classify_images(clf, list_of_image_names)
trained_labels = create_labels_matrix(list_of_image_names)
accuracy = test_classification(predicted_labels, trained_labels)
print accuracy