diff --git a/contributed/batch_represent.py b/contributed/batch_represent.py index 9ec448181..a8d702a32 100755 --- a/contributed/batch_represent.py +++ b/contributed/batch_represent.py @@ -75,7 +75,8 @@ import facenet import numpy as np from sklearn.datasets import load_files -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() from six.moves import xrange def main(args): diff --git a/contributed/cluster.py b/contributed/cluster.py index 6bd189976..82d941c82 100644 --- a/contributed/cluster.py +++ b/contributed/cluster.py @@ -27,7 +27,8 @@ from __future__ import print_function from scipy import misc -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import os import sys diff --git a/contributed/clustering.py b/contributed/clustering.py index 9e53c2360..1f5c89fd2 100644 --- a/contributed/clustering.py +++ b/contributed/clustering.py @@ -1,5 +1,6 @@ """ Face Cluster """ -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import importlib import argparse @@ -190,8 +191,8 @@ def get_onedir(paths): for x in image_paths: if os.path.getsize(x)>0: dataset.append(x) - - return dataset + + return dataset def main(args): @@ -216,22 +217,22 @@ def main(args): #image_list, label_list = facenet.get_image_paths_and_labels(train_set) meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.model_dir)) - + print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) load_model(args.model_dir, meta_file, ckpt_file) - + # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") - + image_size = images_placeholder.get_shape()[1] print("image_size:",image_size) embedding_size = embeddings.get_shape()[1] - + # Run forward pass to calculate embeddings - print('Runnning forward pass on images') + print('Runnning forward pass on images') nrof_images = len(image_paths) nrof_batches = int(math.ceil(1.0*nrof_images / args.batch_size)) @@ -240,7 +241,7 @@ def main(args): embedding_size,nrof_images,nrof_batches,emb_array,args.batch_size,image_paths) sorted_clusters = cluster_facial_encodings(facial_encodings) num_cluster = len(sorted_clusters) - + # Copy image files to cluster folders for idx, cluster in enumerate(sorted_clusters): #save all the cluster diff --git a/contributed/export_embeddings.py b/contributed/export_embeddings.py index d378c2d46..fc6b54667 100644 --- a/contributed/export_embeddings.py +++ b/contributed/export_embeddings.py @@ -52,7 +52,8 @@ import time from scipy import misc -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import sys import os diff --git a/contributed/face.py b/contributed/face.py index 97b95000f..2518920dd 100644 --- a/contributed/face.py +++ b/contributed/face.py @@ -34,7 +34,8 @@ import cv2 import numpy as np -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() from scipy import misc import align.detect_face diff --git a/contributed/predict.py b/contributed/predict.py index 8bb10a81a..7ddadcb6b 100644 --- a/contributed/predict.py +++ b/contributed/predict.py @@ -29,7 +29,8 @@ #---------------------------------------------------- -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import argparse import facenet @@ -43,12 +44,12 @@ from six.moves import xrange def main(args): - + images, cout_per_image, nrof_samples = load_and_align_data(args.image_files,args.image_size, args.margin, args.gpu_memory_fraction) with tf.Graph().as_default(): with tf.Session() as sess: - + # Load the model facenet.load_model(args.model) # Get input and output tensors @@ -66,36 +67,36 @@ def main(args): predictions = model.predict_proba(emb) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] - k=0 - #print predictions + k=0 + #print predictions for i in range(nrof_samples): print("\npeople in image %s :" %(args.image_files[i])) for j in range(cout_per_image[i]): print('%s: %.3f' % (class_names[best_class_indices[k]], best_class_probabilities[k])) k+=1 - + def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor - + print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) - + nrof_samples = len(image_paths) - img_list = [] + img_list = [] count_per_image = [] for i in xrange(nrof_samples): img = misc.imread(os.path.expanduser(image_paths[i])) img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) count_per_image.append(len(bounding_boxes)) - for j in range(len(bounding_boxes)): + for j in range(len(bounding_boxes)): det = np.squeeze(bounding_boxes[j,0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-margin/2, 0) @@ -105,17 +106,17 @@ def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') prewhitened = facenet.prewhiten(aligned) - img_list.append(prewhitened) + img_list.append(prewhitened) images = np.stack(img_list) return images, count_per_image, nrof_samples def parse_arguments(argv): parser = argparse.ArgumentParser() parser.add_argument('image_files', type=str, nargs='+', help='Path(s) of the image(s)') - parser.add_argument('model', type=str, + parser.add_argument('model', type=str, help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file') - parser.add_argument('classifier_filename', - help='Classifier model file name as a pickle (.pkl) file. ' + + parser.add_argument('classifier_filename', + help='Classifier model file name as a pickle (.pkl) file. ' + 'For training this is the output and for classification this is an input.') parser.add_argument('--image_size', type=int, help='Image size (height, width) in pixels.', default=160) diff --git a/requirements.txt b/requirements.txt index b7418c9ac..bf84bb585 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -tensorflow==1.7 -scipy +tensorflow-gpu == 2.2.3 +scipy == 1.2.0 scikit-learn -opencv-python +opencv-python-headless == 3.4.14.53 h5py matplotlib Pillow diff --git a/src/align/align_dataset_mtcnn.py b/src/align/align_dataset_mtcnn.py index 7d5e735e6..c7e708122 100644 --- a/src/align/align_dataset_mtcnn.py +++ b/src/align/align_dataset_mtcnn.py @@ -1,18 +1,18 @@ """Performs face alignment and stores face thumbnails in the output directory.""" # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -29,7 +29,8 @@ import sys import os import argparse -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import facenet import align.detect_face @@ -45,15 +46,15 @@ def main(args): src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) - + print('Creating networks and loading parameters') - + with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) - + minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor @@ -61,7 +62,7 @@ def main(args): # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) - + with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 @@ -92,7 +93,7 @@ def main(args): if img.ndim == 2: img = facenet.to_rgb(img) img = img[:,:,0:3] - + bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: @@ -133,21 +134,21 @@ def main(args): else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) - + print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) - + def parse_arguments(argv): parser = argparse.ArgumentParser() - + parser.add_argument('input_dir', type=str, help='Directory with unaligned images.') parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.') parser.add_argument('--image_size', type=int, help='Image size (height, width) in pixels.', default=182) parser.add_argument('--margin', type=int, help='Margin for the crop around the bounding box (height, width) in pixels.', default=44) - parser.add_argument('--random_order', + parser.add_argument('--random_order', help='Shuffles the order of images to enable alignment using multiple processes.', action='store_true') parser.add_argument('--gpu_memory_fraction', type=float, help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0) diff --git a/src/align/detect_face.py b/src/align/detect_face.py index 7f98ca7fb..13a473c7d 100644 --- a/src/align/detect_face.py +++ b/src/align/detect_face.py @@ -2,19 +2,19 @@ https://github.com/kpzhang93/MTCNN_face_detection_alignment """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -29,7 +29,8 @@ from six import string_types, iteritems import numpy as np -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() #from math import floor import cv2 import os @@ -82,7 +83,7 @@ def load(self, data_path, session, ignore_missing=False): session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. """ - data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member + data_dict = np.load(data_path, encoding='latin1', allow_pickle=True).item() #pylint: disable=no-member for op_name in data_dict: with tf.variable_scope(op_name, reuse=True): @@ -212,7 +213,7 @@ def softmax(self, target, axis, name=None): normalize = tf.reduce_sum(target_exp, axis, keepdims=True) softmax = tf.div(target_exp, normalize, name) return softmax - + class PNet(Network): def setup(self): (self.feed('data') #pylint: disable=no-value-for-parameter, no-member @@ -228,7 +229,7 @@ def setup(self): (self.feed('PReLU3') #pylint: disable=no-value-for-parameter .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) - + class RNet(Network): def setup(self): (self.feed('data') #pylint: disable=no-value-for-parameter, no-member @@ -289,7 +290,7 @@ def create_mtcnn(sess, model_path): data = tf.placeholder(tf.float32, (None,48,48,3), 'input') onet = ONet({'data':data}) onet.load(os.path.join(model_path, 'det3.npy'), sess) - + pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) @@ -329,9 +330,9 @@ def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): out = pnet(img_y) out0 = np.transpose(out[0], (0,2,1,3)) out1 = np.transpose(out[1], (0,2,1,3)) - + boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) - + # inter-scale nms pick = nms(boxes.copy(), 0.5, 'Union') if boxes.size>0 and pick.size>0: @@ -414,7 +415,7 @@ def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): pick = nms(total_boxes.copy(), 0.7, 'Min') total_boxes = total_boxes[pick,:] points = points[:,pick] - + return total_boxes, points @@ -656,7 +657,7 @@ def bbreg(boundingbox,reg): b4 = boundingbox[:,3]+reg[:,3]*h boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) return boundingbox - + def generateBoundingBox(imap, reg, scale, t): """Use heatmap to generate bounding boxes""" stride=2 @@ -682,7 +683,7 @@ def generateBoundingBox(imap, reg, scale, t): q2 = np.fix((stride*bb+cellsize-1+1)/scale) boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) return boundingbox, reg - + # function pick = nms(boxes,threshold,type) def nms(boxes, threshold, method): if boxes.size==0: @@ -736,7 +737,7 @@ def pad(total_boxes, w, h): tmp = np.where(ex>w) edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) ex[tmp] = w - + tmp = np.where(ey>h) edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) ey[tmp] = h @@ -748,7 +749,7 @@ def pad(total_boxes, w, h): tmp = np.where(y<1) dy.flat[tmp] = np.expand_dims(2-y[tmp],1) y[tmp] = 1 - + return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph # function [bboxA] = rerec(bboxA) diff --git a/src/calculate_filtering_metrics.py b/src/calculate_filtering_metrics.py index f60b9ae4d..39d1a4314 100644 --- a/src/calculate_filtering_metrics.py +++ b/src/calculate_filtering_metrics.py @@ -1,19 +1,19 @@ """Calculate filtering metrics for a dataset and store in a .hdf file. """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -26,7 +26,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import argparse import facenet @@ -40,30 +41,30 @@ def main(args): dataset = facenet.get_dataset(args.dataset_dir) - + with tf.Graph().as_default(): - + # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(dataset) nrof_images = len(image_list) image_indices = range(nrof_images) image_batch, label_batch = facenet.read_and_augment_data(image_list, - image_indices, args.image_size, args.batch_size, None, + image_indices, args.image_size, args.batch_size, None, False, False, False, nrof_preprocess_threads=4, shuffle=False) - + model_exp = os.path.expanduser(args.model_file) with gfile.FastGFile(model_exp,'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) input_map={'input':image_batch, 'phase_train':False} tf.import_graph_def(graph_def, input_map=input_map, name='net') - + embeddings = tf.get_default_graph().get_tensor_by_name("net/embeddings:0") with tf.Session() as sess: tf.train.start_queue_runners(sess=sess) - + embedding_size = int(embeddings.get_shape()[1]) nrof_batches = int(math.ceil(nrof_images / args.batch_size)) nrof_classes = len(dataset) @@ -100,18 +101,18 @@ def main(args): idx_array = np.delete(idx_array, cls_idx, axis=0) lab_array = np.delete(lab_array, cls_idx, axis=0) - + print('Batch %d in %.3f seconds' % (i, time.time()-t)) - + print('Writing filtering data to %s' % args.data_file_name) mdict = {'class_names':class_names, 'image_list':image_list, 'label_list':label_list, 'distance_to_center':distance_to_center } with h5py.File(args.data_file_name, 'w') as f: for key, value in iteritems(mdict): f.create_dataset(key, data=value) - + def parse_arguments(argv): parser = argparse.ArgumentParser() - + parser.add_argument('dataset_dir', type=str, help='Path to the directory containing aligned dataset.') parser.add_argument('model_file', type=str, diff --git a/src/classifier.py b/src/classifier.py index 749db4d6b..7a2b713eb 100644 --- a/src/classifier.py +++ b/src/classifier.py @@ -1,19 +1,19 @@ """An example of how to use your own dataset to train a classifier that recognizes people. """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -26,7 +26,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import argparse import facenet @@ -37,13 +38,13 @@ from sklearn.svm import SVC def main(args): - + with tf.Graph().as_default(): - + with tf.Session() as sess: - + np.random.seed(seed=args.seed) - + if args.use_split_dataset: dataset_tmp = facenet.get_dataset(args.data_dir) train_set, test_set = split_dataset(dataset_tmp, args.min_nrof_images_per_class, args.nrof_train_images_per_class) @@ -56,24 +57,24 @@ def main(args): # Check that there are at least one training image per class for cls in dataset: - assert(len(cls.image_paths)>0, 'There must be at least one image for each class in the dataset') + assert(len(cls.image_paths)>0, 'There must be at least one image for each class in the dataset') + - paths, labels = facenet.get_image_paths_and_labels(dataset) - + print('Number of classes: %d' % len(dataset)) print('Number of images: %d' % len(paths)) - + # Load the model print('Loading feature extraction model') facenet.load_model(args.model) - + # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] - + # Run forward pass to calculate embeddings print('Calculating features for images') nrof_images = len(paths) @@ -86,7 +87,7 @@ def main(args): images = facenet.load_data(paths_batch, False, False, args.image_size) feed_dict = { images_placeholder:images, phase_train_placeholder:False } emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict) - + classifier_filename_exp = os.path.expanduser(args.classifier_filename) if (args.mode=='TRAIN'): @@ -94,7 +95,7 @@ def main(args): print('Training classifier') model = SVC(kernel='linear', probability=True) model.fit(emb_array, labels) - + # Create a list of class names class_names = [ cls.name.replace('_', ' ') for cls in dataset] @@ -102,7 +103,7 @@ def main(args): with open(classifier_filename_exp, 'wb') as outfile: pickle.dump((model, class_names), outfile) print('Saved classifier model to file "%s"' % classifier_filename_exp) - + elif (args.mode=='CLASSIFY'): # Classify images print('Testing classifier') @@ -114,14 +115,14 @@ def main(args): predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] - + for i in range(len(best_class_indices)): print('%4d %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i])) - + accuracy = np.mean(np.equal(best_class_indices, labels)) print('Accuracy: %.3f' % accuracy) - - + + def split_dataset(dataset, min_nrof_images_per_class, nrof_train_images_per_class): train_set = [] test_set = [] @@ -134,22 +135,22 @@ def split_dataset(dataset, min_nrof_images_per_class, nrof_train_images_per_clas test_set.append(facenet.ImageClass(cls.name, paths[nrof_train_images_per_class:])) return train_set, test_set - + def parse_arguments(argv): parser = argparse.ArgumentParser() - + parser.add_argument('mode', type=str, choices=['TRAIN', 'CLASSIFY'], - help='Indicates if a new classifier should be trained or a classification ' + + help='Indicates if a new classifier should be trained or a classification ' + 'model should be used for classification', default='CLASSIFY') parser.add_argument('data_dir', type=str, help='Path to the data directory containing aligned LFW face patches.') - parser.add_argument('model', type=str, + parser.add_argument('model', type=str, help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file') - parser.add_argument('classifier_filename', - help='Classifier model file name as a pickle (.pkl) file. ' + + parser.add_argument('classifier_filename', + help='Classifier model file name as a pickle (.pkl) file. ' + 'For training this is the output and for classification this is an input.') - parser.add_argument('--use_split_dataset', - help='Indicates that the dataset specified by data_dir should be split into a training and test set. ' + + parser.add_argument('--use_split_dataset', + help='Indicates that the dataset specified by data_dir should be split into a training and test set. ' + 'Otherwise a separate test set can be specified using the test_data_dir option.', action='store_true') parser.add_argument('--test_data_dir', type=str, help='Path to the test data directory containing aligned images used for testing.') @@ -163,7 +164,7 @@ def parse_arguments(argv): help='Only include classes with at least this number of images in the dataset', default=20) parser.add_argument('--nrof_train_images_per_class', type=int, help='Use this number of images from each class for training and the rest for testing', default=10) - + return parser.parse_args(argv) if __name__ == '__main__': diff --git a/src/compare.py b/src/compare.py index bc53cc421..fb2e25ce2 100644 --- a/src/compare.py +++ b/src/compare.py @@ -1,19 +1,19 @@ """Performs face alignment and calculates L2 distance between the embeddings of images.""" # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -27,7 +27,8 @@ from __future__ import print_function from scipy import misc -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import sys import os @@ -42,10 +43,10 @@ def main(args): with tf.Graph().as_default(): with tf.Session() as sess: - + # Load the model facenet.load_model(args.model) - + # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") @@ -54,14 +55,14 @@ def main(args): # Run forward pass to calculate embeddings feed_dict = { images_placeholder: images, phase_train_placeholder:False } emb = sess.run(embeddings, feed_dict=feed_dict) - + nrof_images = len(args.image_files) print('Images:') for i in range(nrof_images): print('%1d: %s' % (i, args.image_files[i])) print('') - + # Print distance matrix print('Distance matrix') print(' ', end='') @@ -74,21 +75,21 @@ def main(args): dist = np.sqrt(np.sum(np.square(np.subtract(emb[i,:], emb[j,:])))) print(' %1.4f ' % dist, end='') print('') - - + + def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor - + print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) - + tmp_image_paths=copy.copy(image_paths) img_list = [] for image in tmp_image_paths: @@ -114,8 +115,8 @@ def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): def parse_arguments(argv): parser = argparse.ArgumentParser() - - parser.add_argument('model', type=str, + + parser.add_argument('model', type=str, help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file') parser.add_argument('image_files', type=str, nargs='+', help='Images to compare') parser.add_argument('--image_size', type=int, diff --git a/src/facenet.py b/src/facenet.py index 0e056765a..d7139a36d 100644 --- a/src/facenet.py +++ b/src/facenet.py @@ -1,19 +1,19 @@ """Functions for building the face recognition network. """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -29,7 +29,8 @@ import os from subprocess import Popen, PIPE -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np from scipy import misc from sklearn.model_selection import KFold @@ -43,24 +44,24 @@ def triplet_loss(anchor, positive, negative, alpha): """Calculate the triplet loss according to the FaceNet paper - + Args: anchor: the embeddings for the anchor images. positive: the embeddings for the positive images. negative: the embeddings for the negative images. - + Returns: the triplet loss according to the FaceNet paper as a float tensor. """ with tf.variable_scope('triplet_loss'): pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) - + basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) - + return loss - + def center_loss(features, label, alfa, nrof_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) @@ -93,7 +94,7 @@ def shuffle_examples(image_paths, labels): def random_rotate_image(image): angle = np.random.uniform(low=-10.0, high=10.0) return misc.imrotate(image, angle, 'bicubic') - + # 1: Random rotate 2: Random crop 4: Random flip 8: Fixed image standardization 16: Flip RANDOM_ROTATE = 1 RANDOM_CROP = 2 @@ -109,10 +110,10 @@ def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batc file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, 3) image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE), - lambda:tf.py_func(random_rotate_image, [image], tf.uint8), + lambda:tf.py_func(random_rotate_image, [image], tf.uint8), lambda:tf.identity(image)) - image = tf.cond(get_control_flag(control[0], RANDOM_CROP), - lambda:tf.random_crop(image, image_size + (3,)), + image = tf.cond(get_control_flag(control[0], RANDOM_CROP), + lambda:tf.random_crop(image, image_size + (3,)), lambda:tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1])) image = tf.cond(get_control_flag(control[0], RANDOM_FLIP), lambda:tf.image.random_flip_left_right(image), @@ -129,22 +130,22 @@ def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batc images_and_labels_list.append([images, label]) image_batch, label_batch = tf.train.batch_join( - images_and_labels_list, batch_size=batch_size_placeholder, + images_and_labels_list, batch_size=batch_size_placeholder, shapes=[image_size + (3,), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * 100, allow_smaller_final_batch=True) - + return image_batch, label_batch def get_control_flag(control, field): return tf.equal(tf.mod(tf.floor_div(control, field), 2), 1) - + def _add_loss_summaries(total_loss): """Add summaries for losses. - + Generates moving average for all losses and associated summaries for visualizing the performance of the network. - + Args: total_loss: Total loss from loss(). Returns: @@ -154,7 +155,7 @@ def _add_loss_summaries(total_loss): loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) - + # Attach a scalar summmary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: @@ -162,7 +163,7 @@ def _add_loss_summaries(total_loss): # as the original loss name. tf.summary.scalar(l.op.name +' (raw)', l) tf.summary.scalar(l.op.name, loss_averages.average(l)) - + return loss_averages_op def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): @@ -183,31 +184,31 @@ def train(total_loss, global_step, optimizer, learning_rate, moving_average_deca opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) else: raise ValueError('Invalid optimization algorithm') - + grads = opt.compute_gradients(total_loss, update_gradient_vars) - + # Apply gradients. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) - + # Add histograms for trainable variables. if log_histograms: for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) - + # Add histograms for gradients. if log_histograms: for grad, var in grads: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) - + # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) - + with tf.control_dependencies([apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') - + return train_op def prewhiten(x): @@ -215,7 +216,7 @@ def prewhiten(x): std = np.std(x) std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) y = np.multiply(np.subtract(x, mean), 1/std_adj) - return y + return y def crop(image, random_crop, image_size): if image.shape[1]>image_size: @@ -228,7 +229,7 @@ def crop(image, random_crop, image_size): (h, v) = (0,0) image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] return image - + def flip(image, random_flip): if random_flip and np.random.choice([True, False]): image = np.fliplr(image) @@ -239,7 +240,7 @@ def to_rgb(img): ret = np.empty((w, h, 3), dtype=np.uint8) ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img return ret - + def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): nrof_samples = len(image_paths) images = np.zeros((nrof_samples, image_size, image_size, 3)) @@ -307,13 +308,13 @@ class ImageClass(): def __init__(self, name, image_paths): self.name = name self.image_paths = image_paths - + def __str__(self): return self.name + ', ' + str(len(self.image_paths)) + ' images' - + def __len__(self): return len(self.image_paths) - + def get_dataset(path, has_class_directories=True): dataset = [] path_exp = os.path.expanduser(path) @@ -326,7 +327,7 @@ def get_dataset(path, has_class_directories=True): facedir = os.path.join(path_exp, class_name) image_paths = get_image_paths(facedir) dataset.append(ImageClass(class_name, image_paths)) - + return dataset def get_image_paths(facedir): @@ -335,7 +336,7 @@ def get_image_paths(facedir): images = os.listdir(facedir) image_paths = [os.path.join(facedir,img) for img in images] return image_paths - + def split_dataset(dataset, split_ratio, min_nrof_images_per_class, mode): if mode=='SPLIT_CLASSES': nrof_classes = len(dataset) @@ -374,13 +375,13 @@ def load_model(model, input_map=None): else: print('Model directory: %s' % model_exp) meta_file, ckpt_file = get_model_filenames(model_exp) - + print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) - + saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map) saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file)) - + def get_model_filenames(model_dir): files = os.listdir(model_dir) meta_files = [s for s in files if s.endswith('.meta')] @@ -404,7 +405,7 @@ def get_model_filenames(model_dir): max_step = step ckpt_file = step_str.groups()[0] return meta_file, ckpt_file - + def distance(embeddings1, embeddings2, distance_metric=0): if distance_metric==0: # Euclidian distance @@ -417,8 +418,8 @@ def distance(embeddings1, embeddings2, distance_metric=0): similarity = dot / norm dist = np.arccos(similarity) / math.pi else: - raise 'Undefined distance metric %d' % distance_metric - + raise 'Undefined distance metric %d' % distance_metric + return dist def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): @@ -427,20 +428,20 @@ def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_fold nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) nrof_thresholds = len(thresholds) k_fold = KFold(n_splits=nrof_folds, shuffle=False) - + tprs = np.zeros((nrof_folds,nrof_thresholds)) fprs = np.zeros((nrof_folds,nrof_thresholds)) accuracy = np.zeros((nrof_folds)) - + indices = np.arange(nrof_pairs) - + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): if subtract_mean: mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) else: mean = 0.0 dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) - + # Find the best threshold for the fold acc_train = np.zeros((nrof_thresholds)) for threshold_idx, threshold in enumerate(thresholds): @@ -449,7 +450,7 @@ def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_fold for threshold_idx, threshold in enumerate(thresholds): tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) - + tpr = np.mean(tprs,0) fpr = np.mean(fprs,0) return tpr, fpr, accuracy @@ -460,33 +461,33 @@ def calculate_accuracy(threshold, dist, actual_issame): fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) - + tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) acc = float(tp+tn)/dist.size return tpr, fpr, acc - + def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) nrof_thresholds = len(thresholds) k_fold = KFold(n_splits=nrof_folds, shuffle=False) - + val = np.zeros(nrof_folds) far = np.zeros(nrof_folds) - + indices = np.arange(nrof_pairs) - + for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): if subtract_mean: mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) else: mean = 0.0 dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) - + # Find the threshold that gives FAR = far_target far_train = np.zeros(nrof_thresholds) for threshold_idx, threshold in enumerate(thresholds): @@ -496,9 +497,9 @@ def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_targe threshold = f(far_target) else: threshold = 0.0 - + val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) - + val_mean = np.mean(val) far_mean = np.mean(far) val_std = np.std(val) @@ -524,7 +525,7 @@ def store_revision_info(src_path, output_dir, arg_string): git_hash = stdout.strip() except OSError as e: git_hash = ' '.join(cmd) + ': ' + e.strerror - + try: # Get local changes cmd = ['git', 'diff', 'HEAD'] @@ -533,7 +534,7 @@ def store_revision_info(src_path, output_dir, arg_string): git_diff = stdout.strip() except OSError as e: git_diff = ' '.join(cmd) + ': ' + e.strerror - + # Store a text file in the log directory rev_info_filename = os.path.join(output_dir, 'revision_info.txt') with open(rev_info_filename, "w") as text_file: diff --git a/src/freeze_graph.py b/src/freeze_graph.py index 3584c186e..8032dee1d 100644 --- a/src/freeze_graph.py +++ b/src/freeze_graph.py @@ -2,19 +2,19 @@ and exports the model as a graphdef protobuf """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -28,7 +28,8 @@ from __future__ import print_function from tensorflow.python.framework import graph_util -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import argparse import os import sys @@ -41,7 +42,7 @@ def main(args): # Load the model metagraph and checkpoint print('Model directory: %s' % args.model_dir) meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.model_dir)) - + print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) @@ -50,10 +51,10 @@ def main(args): tf.get_default_session().run(tf.global_variables_initializer()) tf.get_default_session().run(tf.local_variables_initializer()) saver.restore(tf.get_default_session(), os.path.join(model_dir_exp, ckpt_file)) - + # Retrieve the protobuf graph definition and fix the batch norm nodes input_graph_def = sess.graph.as_graph_def() - + # Freeze the graph def output_graph_def = freeze_graph_def(sess, input_graph_def, 'embeddings,label_batch') @@ -61,7 +62,7 @@ def main(args): with tf.gfile.GFile(args.output_file, 'wb') as f: f.write(output_graph_def.SerializeToString()) print("%d ops in the final graph: %s" % (len(output_graph_def.node), args.output_file)) - + def freeze_graph_def(sess, input_graph_def, output_node_names): for node in input_graph_def.node: if node.op == 'RefSwitch': @@ -75,11 +76,11 @@ def freeze_graph_def(sess, input_graph_def, output_node_names): elif node.op == 'AssignAdd': node.op = 'Add' if 'use_locking' in node.attr: del node.attr['use_locking'] - + # Get the list of important nodes whitelist_names = [] for node in input_graph_def.node: - if (node.name.startswith('InceptionResnet') or node.name.startswith('embeddings') or + if (node.name.startswith('InceptionResnet') or node.name.startswith('embeddings') or node.name.startswith('image_batch') or node.name.startswith('label_batch') or node.name.startswith('phase_train') or node.name.startswith('Logits')): whitelist_names.append(node.name) @@ -89,13 +90,13 @@ def freeze_graph_def(sess, input_graph_def, output_node_names): sess, input_graph_def, output_node_names.split(","), variable_names_whitelist=whitelist_names) return output_graph_def - + def parse_arguments(argv): parser = argparse.ArgumentParser() - - parser.add_argument('model_dir', type=str, + + parser.add_argument('model_dir', type=str, help='Directory containing the metagraph (.meta) file and the checkpoint (ckpt) file containing model parameters') - parser.add_argument('output_file', type=str, + parser.add_argument('output_file', type=str, help='Filename for the exported graphdef protobuf (.pb)') return parser.parse_args(argv) diff --git a/src/generative/calculate_attribute_vectors.py b/src/generative/calculate_attribute_vectors.py index 8fe3ead78..9c848b9ef 100644 --- a/src/generative/calculate_attribute_vectors.py +++ b/src/generative/calculate_attribute_vectors.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2017 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,14 +20,15 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Calculate average latent variables (here called attribute vectors) +"""Calculate average latent variables (here called attribute vectors) for the different attributes in CelebA """ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import sys import argparse import importlib @@ -40,23 +41,23 @@ from six import iteritems def main(args): - + img_mean = np.array([134.10714722, 102.52040863, 87.15436554]) img_stddev = np.sqrt(np.array([3941.30175781, 2856.94287109, 2519.35791016])) - + vae_checkpoint = os.path.expanduser(args.vae_checkpoint) - + fields, attribs_dict = read_annotations(args.annotations_filename) - + vae_def = importlib.import_module(args.vae_def) vae = vae_def.Vae(args.latent_var_size) gen_image_size = vae.get_image_size() with tf.Graph().as_default(): tf.set_random_seed(args.seed) - + image_list = facenet.get_image_paths(os.path.expanduser(args.data_dir)) - + # Get attributes for images nrof_attributes = len(fields) attribs_list = [] @@ -65,11 +66,11 @@ def main(args): attr = attribs_dict[key] assert len(attr)==nrof_attributes attribs_list.append(attr) - + # Create the input queue index_list = range(len(image_list)) - input_queue = tf.train.slice_input_producer([image_list, attribs_list, index_list], num_epochs=1, shuffle=False) - + input_queue = tf.train.slice_input_producer([image_list, attribs_list, index_list], num_epochs=1, shuffle=False) + nrof_preprocess_threads = 4 image_per_thread = [] for _ in range(nrof_preprocess_threads): @@ -83,29 +84,29 @@ def main(args): attrib.set_shape((nrof_attributes,)) image = tf.cast(image, tf.float32) image_per_thread.append([image, attrib, input_queue[2]]) - + images, attribs, indices = tf.train.batch_join( - image_per_thread, batch_size=args.batch_size, + image_per_thread, batch_size=args.batch_size, shapes=[(args.image_size, args.image_size, 3), (nrof_attributes,), ()], enqueue_many=False, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) - + # Normalize images_norm = (images-img_mean) / img_stddev - # Resize to appropriate size for the encoder + # Resize to appropriate size for the encoder images_norm_resize = tf.image.resize_images(images_norm, (gen_image_size,gen_image_size)) - + # Create encoder network mean, log_variance = vae.encoder(images_norm_resize, True) - + epsilon = tf.random_normal((tf.shape(mean)[0], args.latent_var_size)) std = tf.exp(log_variance/2) latent_var = mean + epsilon * std - + # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) - + # Start running operations on the Graph gpu_memory_fraction = 1.0 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) @@ -114,14 +115,14 @@ def main(args): sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) - + with sess.as_default(): - + if vae_checkpoint: print('Restoring VAE checkpoint: %s' % vae_checkpoint) saver.restore(sess, vae_checkpoint) - + nrof_images = len(image_list) nrof_batches = int(math.ceil(len(image_list) / args.batch_size)) latent_vars = np.zeros((nrof_images, args.latent_var_size)) @@ -135,7 +136,7 @@ def main(args): print('Batch %d/%d: %.3f seconds' % (i+1, nrof_batches, duration)) # NOTE: This will print the 'Out of range' warning if the last batch is not full, # as described by https://github.com/tensorflow/tensorflow/issues/8330 - + # Calculate average change in the latent variable when each attribute changes attribute_vectors = np.zeros((nrof_attributes, args.latent_var_size), np.float32) for i in range(nrof_attributes): @@ -144,18 +145,18 @@ def main(args): pos_avg = np.mean(latent_vars[pos_idx,:], 0) neg_avg = np.mean(latent_vars[neg_idx,:], 0) attribute_vectors[i,:] = pos_avg - neg_avg - + filename = os.path.expanduser(args.output_filename) print('Writing attribute vectors, latent variables and attributes to %s' % filename) - mdict = {'latent_vars':latent_vars, 'attributes':attributes, + mdict = {'latent_vars':latent_vars, 'attributes':attributes, 'fields':fields, 'attribute_vectors':attribute_vectors } with h5py.File(filename, 'w') as f: for key, value in iteritems(mdict): f.create_dataset(key, data=value) - - + + def read_annotations(filename): - attribs = {} + attribs = {} with open(filename, 'r') as f: for i, line in enumerate(f.readlines()): if i==0: @@ -171,9 +172,9 @@ def read_annotations(filename): def parse_arguments(argv): parser = argparse.ArgumentParser() - + parser.add_argument('vae_def', type=str, - help='Model definition for the variational autoencoder. Points to a module containing the definition.', + help='Model definition for the variational autoencoder. Points to a module containing the definition.', default='src.generative.models.dfc_vae') parser.add_argument('vae_checkpoint', type=str, help='Checkpoint file of a pre-trained variational autoencoder.') @@ -194,7 +195,7 @@ def parse_arguments(argv): help='Random seed.', default=666) return parser.parse_args(argv) - - + + if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) diff --git a/src/generative/models/dfc_vae.py b/src/generative/models/dfc_vae.py index b4450f2da..44e9d434b 100644 --- a/src/generative/models/dfc_vae.py +++ b/src/generative/models/dfc_vae.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2017 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Variational autoencoder based on the paper +"""Variational autoencoder based on the paper 'Deep Feature Consistent Variational Autoencoder' (https://arxiv.org/pdf/1610.00291.pdf) """ @@ -29,16 +29,17 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import tensorflow.contrib.slim as slim import generative.models.vae_base # @UnresolvedImport class Vae(generative.models.vae_base.Vae): - + def __init__(self, latent_variable_dim): super(Vae, self).__init__(latent_variable_dim, 64) - + def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 @@ -58,10 +59,10 @@ def encoder(self, images, is_training): fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2 - + def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu - weight_decay = 0.0 + weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): @@ -72,21 +73,20 @@ def decoder(self, latent_var, is_training): normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1,4,4,256], name='Reshape') - + net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1') - + net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2') - + net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3') - + net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4') - + return net - + def leaky_relu(x): return tf.maximum(0.1*x,x) - \ No newline at end of file diff --git a/src/generative/models/dfc_vae_large.py b/src/generative/models/dfc_vae_large.py index aa8e8b716..70d647b0e 100644 --- a/src/generative/models/dfc_vae_large.py +++ b/src/generative/models/dfc_vae_large.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2017 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Variational autoencoder based on the paper +"""Variational autoencoder based on the paper 'Deep Feature Consistent Variational Autoencoder' (https://arxiv.org/pdf/1610.00291.pdf) but with a larger image size (128x128 pixels) """ @@ -29,17 +29,18 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import tensorflow.contrib.slim as slim import generative.models.vae_base # @UnresolvedImport class Vae(generative.models.vae_base.Vae): - + def __init__(self, latent_variable_dim): super(Vae, self).__init__(latent_variable_dim, 128) - - + + def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 @@ -60,10 +61,10 @@ def encoder(self, images, is_training): fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2 - + def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu - weight_decay = 0.0 + weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): @@ -74,22 +75,22 @@ def decoder(self, latent_var, is_training): normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1,4,4,256], name='Reshape') - + net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1') - + net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2') - + net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3') - + net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4') - + net = tf.image.resize_nearest_neighbor(net, size=(128,128), name='Upsample_5') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_5') return net def leaky_relu(x): - return tf.maximum(0.1*x,x) + return tf.maximum(0.1*x,x) diff --git a/src/generative/models/dfc_vae_resnet.py b/src/generative/models/dfc_vae_resnet.py index 7c2f52c6c..fdd1e144e 100644 --- a/src/generative/models/dfc_vae_resnet.py +++ b/src/generative/models/dfc_vae_resnet.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2017 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -"""Variational autoencoder based on the paper +"""Variational autoencoder based on the paper 'Deep Feature Consistent Variational Autoencoder' (https://arxiv.org/pdf/1610.00291.pdf) """ @@ -29,16 +29,17 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import tensorflow.contrib.slim as slim import generative.models.vae_base # @UnresolvedImport class Vae(generative.models.vae_base.Vae): - + def __init__(self, latent_variable_dim): super(Vae, self).__init__(latent_variable_dim, 64) - + def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 @@ -51,10 +52,10 @@ def encoder(self, images, is_training): normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images - + net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') - + net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') @@ -63,15 +64,15 @@ def encoder(self, images, is_training): net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') - + net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2 - + def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu - weight_decay = 0.0 + weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): @@ -82,29 +83,28 @@ def decoder(self, latent_var, is_training): normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1,4,4,256], name='Reshape') - + net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b') - + net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b') - + net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b') - + net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c') - + return net - + def conv2d_block(inp, scale, *args, **kwargs): return inp + slim.conv2d(inp, *args, **kwargs) * scale def leaky_relu(x): return tf.maximum(0.1*x,x) - \ No newline at end of file diff --git a/src/generative/models/vae_base.py b/src/generative/models/vae_base.py index 7437251de..2ceb2e44f 100644 --- a/src/generative/models/vae_base.py +++ b/src/generative/models/vae_base.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2017 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -27,10 +27,11 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() class Vae(object): - + def __init__(self, latent_variable_dim, image_size): self.latent_variable_dim = latent_variable_dim self.image_size = image_size @@ -44,15 +45,14 @@ def __init__(self, latent_variable_dim, image_size): # Moving averages ends up in the trainable variables collection 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], } - + def encoder(self, images, is_training): # Must be overridden in implementation classes raise NotImplementedError - + def decoder(self, latent_var, is_training): # Must be overridden in implementation classes raise NotImplementedError def get_image_size(self): return self.image_size - \ No newline at end of file diff --git a/src/generative/modify_attribute.py b/src/generative/modify_attribute.py index 8187cff47..ba137779c 100644 --- a/src/generative/modify_attribute.py +++ b/src/generative/modify_attribute.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2017 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -28,7 +28,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import sys import argparse import importlib @@ -40,41 +41,41 @@ from scipy import misc def main(args): - + img_mean = np.array([134.10714722, 102.52040863, 87.15436554]) img_stddev = np.sqrt(np.array([3941.30175781, 2856.94287109, 2519.35791016])) - + vae_def = importlib.import_module(args.vae_def) vae = vae_def.Vae(args.latent_var_size) gen_image_size = vae.get_image_size() with tf.Graph().as_default(): tf.set_random_seed(args.seed) - + images = tf.placeholder(tf.float32, shape=(None,gen_image_size,gen_image_size,3), name='input') - + # Normalize images_norm = (images-img_mean) / img_stddev - # Resize to appropriate size for the encoder + # Resize to appropriate size for the encoder images_norm_resize = tf.image.resize_images(images_norm, (gen_image_size,gen_image_size)) - + # Create encoder network mean, log_variance = vae.encoder(images_norm_resize, True) - + epsilon = tf.random_normal((tf.shape(mean)[0], args.latent_var_size)) std = tf.exp(log_variance/2) latent_var = mean + epsilon * std - + # Create decoder reconstructed_norm = vae.decoder(latent_var, False) - + # Un-normalize reconstructed = (reconstructed_norm*img_stddev) + img_mean # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) - + # Start running operations on the Graph gpu_memory_fraction = 1.0 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) @@ -83,14 +84,14 @@ def main(args): sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) - + with sess.as_default(): - + vae_checkpoint = os.path.expanduser(args.vae_checkpoint) print('Restoring VAE checkpoint: %s' % vae_checkpoint) saver.restore(sess, vae_checkpoint) - + filename = os.path.expanduser(args.attributes_filename) with h5py.File(filename,'r') as f: latent_vars = np.array(f.get('latent_vars')) @@ -109,19 +110,19 @@ def main(args): idx = np.argwhere(attributes[:,attribute_index]==-1)[image_index,0] for i in range(nrof_interp_steps): sweep_latent_var[i+nrof_interp_steps*j,:] = latent_vars[idx,:] + 5.0*i/nrof_interp_steps*attribute_vectors[attribute_index,:] - + recon = sess.run(reconstructed, feed_dict={latent_var:sweep_latent_var}) - + img = facenet.put_images_on_grid(recon, shape=(nrof_interp_steps*2,int(math.ceil(nrof_images/2)))) - + image_filename = os.path.expanduser(args.output_image_filename) print('Writing generated image to %s' % image_filename) misc.imsave(image_filename, img) - + def parse_arguments(argv): parser = argparse.ArgumentParser() - + parser.add_argument('vae_def', type=str, help='Model definition for the variational autoencoder. Points to a module containing the definition.') parser.add_argument('vae_checkpoint', type=str, @@ -136,7 +137,7 @@ def parse_arguments(argv): help='Random seed.', default=666) return parser.parse_args(argv) - - + + if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) diff --git a/src/generative/train_vae.py b/src/generative/train_vae.py index c3c882fab..261c05b87 100644 --- a/src/generative/train_vae.py +++ b/src/generative/train_vae.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2017 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -26,7 +26,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import tensorflow.contrib.slim as slim import sys import time @@ -41,10 +42,10 @@ from six import iteritems def main(args): - + img_mean = np.array([134.10714722, 102.52040863, 87.15436554]) img_stddev = np.sqrt(np.array([3941.30175781, 2856.94287109, 2519.35791016])) - + vae_def = importlib.import_module(args.vae_def) vae = vae_def.Vae(args.latent_var_size) gen_image_size = vae.get_image_size() @@ -54,24 +55,24 @@ def main(args): if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) log_file_name = os.path.join(model_dir, 'logs.h5') - + # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(model_dir, 'arguments.txt')) - + # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, model_dir, ' '.join(sys.argv)) - + with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) - + train_set = facenet.get_dataset(args.data_dir) image_list, _ = facenet.get_image_paths_and_labels(train_set) - + # Create the input queue input_queue = tf.train.string_input_producer(image_list, shuffle=True) - + nrof_preprocess_threads = 4 image_per_thread = [] for _ in range(nrof_preprocess_threads): @@ -82,31 +83,31 @@ def main(args): image = tf.cast(image, tf.float32) #pylint: disable=no-member image_per_thread.append([image]) - + images = tf.train.batch_join( image_per_thread, batch_size=args.batch_size, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=False) - + # Normalize images_norm = (images-img_mean) / img_stddev - # Resize to appropriate size for the encoder + # Resize to appropriate size for the encoder images_norm_resize = tf.image.resize_images(images_norm, (gen_image_size,gen_image_size)) - + # Create encoder network mean, log_variance = vae.encoder(images_norm_resize, True) - + epsilon = tf.random_normal((tf.shape(mean)[0], args.latent_var_size)) std = tf.exp(log_variance/2) latent_var = mean + epsilon * std - + # Create decoder network reconstructed_norm = vae.decoder(latent_var, True) - + # Un-normalize reconstructed = (reconstructed_norm*img_stddev) + img_mean - + # Create reconstruction loss if args.reconstruction_loss_type=='PLAIN': images_resize = tf.image.resize_images(images, (gen_image_size,gen_image_size)) @@ -116,10 +117,10 @@ def main(args): reconstructed_norm_resize = tf.image.resize_images(reconstructed_norm, (args.input_image_size,args.input_image_size)) - # Stack images from both the input batch and the reconstructed batch in a new tensor + # Stack images from both the input batch and the reconstructed batch in a new tensor shp = [-1] + images_norm.get_shape().as_list()[1:] input_images = tf.reshape(tf.stack([images_norm, reconstructed_norm_resize], axis=0), shp) - _, end_points = network.inference(input_images, 1.0, + _, end_points = network.inference(input_images, 1.0, phase_train=False, bottleneck_layer_size=128, weight_decay=0.0) # Get a list of feature names to use for loss terms @@ -136,20 +137,20 @@ def main(args): reconstruction_loss = tf.add_n(reconstruction_loss_list, 'reconstruction_loss') else: pass - + # Create KL divergence loss kl_loss = kl_divergence_loss(mean, log_variance) kl_loss_mean = tf.reduce_mean(kl_loss) - + total_loss = args.alfa*kl_loss_mean + args.beta*reconstruction_loss - + learning_rate = tf.train.exponential_decay(args.initial_learning_rate, global_step, args.learning_rate_decay_steps, args.learning_rate_decay_factor, staircase=True) - + # Calculate gradients and make sure not to include parameters for the perceptual loss model opt = tf.train.AdamOptimizer(learning_rate) grads = opt.compute_gradients(total_loss, var_list=get_variables_to_train()) - + # Apply gradients apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) with tf.control_dependencies([apply_gradient_op]): @@ -157,7 +158,7 @@ def main(args): # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) - + facenet_saver = tf.train.Saver(get_facenet_variables_to_restore()) # Start running operations on the Graph @@ -170,21 +171,21 @@ def main(args): tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): - + if args.reconstruction_loss_type=='PERCEPTUAL': if not args.pretrained_model: raise ValueError('A pretrained model must be specified when using perceptual loss') pretrained_model_exp = os.path.expanduser(args.pretrained_model) print('Restoring pretrained model: %s' % pretrained_model_exp) facenet_saver.restore(sess, pretrained_model_exp) - + log = { 'total_loss': np.zeros((0,), np.float), 'reconstruction_loss': np.zeros((0,), np.float), 'kl_loss': np.zeros((0,), np.float), 'learning_rate': np.zeros((0,), np.float), } - + step = 0 print('Running training') while step < args.max_nrof_steps: @@ -237,7 +238,7 @@ def kl_divergence_loss(mean, log_variance): def parse_arguments(argv): parser = argparse.ArgumentParser() - + parser.add_argument('vae_def', type=str, help='Model definition for the variational autoencoder. Points to a module containing the definition.') parser.add_argument('data_dir', type=str, @@ -260,7 +261,7 @@ def parse_arguments(argv): parser.add_argument('--batch_size', type=int, help='Number of images to process in a batch.', default=128) parser.add_argument('--input_image_size', type=int, - help='Image size of input images (height, width) in pixels. If perceptual loss is used this ' + help='Image size of input images (height, width) in pixels. If perceptual loss is used this ' + 'should be the input image size for the perceptual loss model', default=160) parser.add_argument('--latent_var_size', type=int, help='Dimensionality of the latent variable.', default=100) @@ -276,9 +277,9 @@ def parse_arguments(argv): help='Kullback-Leibler divergence loss factor.', default=1.0) parser.add_argument('--beta', type=float, help='Reconstruction loss factor.', default=0.5) - + return parser.parse_args(argv) - - + + if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) diff --git a/src/models/dummy.py b/src/models/dummy.py index 7afe1ef59..33f5b0699 100644 --- a/src/models/dummy.py +++ b/src/models/dummy.py @@ -1,19 +1,19 @@ """Dummy model used only for testing """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -26,10 +26,11 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import tensorflow.contrib.slim as slim import numpy as np - + def inference(images, keep_probability, phase_train=True, # @UnusedVariable bottleneck_layer_size=128, bottleneck_layer_activation=None, weight_decay=0.0, reuse=None): # @UnusedVariable batch_norm_params = { @@ -42,13 +43,13 @@ def inference(images, keep_probability, phase_train=True, # @UnusedVariable # Moving averages ends up in the trainable variables collection 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], } - + with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): size = np.prod(images.get_shape()[1:].as_list()) - net = slim.fully_connected(tf.reshape(images, (-1,size)), bottleneck_layer_size, activation_fn=None, + net = slim.fully_connected(tf.reshape(images, (-1,size)), bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, None diff --git a/src/models/inception_resnet_v1.py b/src/models/inception_resnet_v1.py index 475e81bb4..297839f61 100644 --- a/src/models/inception_resnet_v1.py +++ b/src/models/inception_resnet_v1.py @@ -23,7 +23,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import tensorflow.contrib.slim as slim # Inception-Resnet-A @@ -87,7 +88,7 @@ def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): if activation_fn: net = activation_fn(net) return net - + def reduction_a(net, k, l, m, n): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID', @@ -126,8 +127,8 @@ def reduction_b(net): net = tf.concat([tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) return net - -def inference(images, keep_probability, phase_train=True, + +def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None): batch_norm_params = { # Decay for the moving averages. @@ -139,9 +140,9 @@ def inference(images, keep_probability, phase_train=True, # Moving averages ends up in the trainable variables collection 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], } - + with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_initializer=slim.initializers.xavier_initializer(), + weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): @@ -152,7 +153,7 @@ def inference(images, keep_probability, phase_train=True, def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, - reuse=None, + reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: @@ -168,13 +169,13 @@ def inception_resnet_v1(inputs, is_training=True, end_points: the set of end_points from the inception model. """ end_points = {} - + with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): - + # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') @@ -202,45 +203,45 @@ def inception_resnet_v1(inputs, is_training=True, net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net - + # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) end_points['Mixed_5a'] = net - + # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net - + # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) end_points['Mixed_6b'] = net - + # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net - + # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) end_points['Mixed_8a'] = net - + net = block8(net, activation_fn=None) end_points['Mixed_8b'] = net - + with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) - + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') - + end_points['PreLogitsFlatten'] = net - - net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, + + net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) - + return net, end_points diff --git a/src/models/inception_resnet_v2.py b/src/models/inception_resnet_v2.py index 0fb176fd0..d8fc89085 100644 --- a/src/models/inception_resnet_v2.py +++ b/src/models/inception_resnet_v2.py @@ -23,7 +23,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import tensorflow.contrib.slim as slim # Inception-Resnet-A @@ -87,8 +88,8 @@ def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): if activation_fn: net = activation_fn(net) return net - -def inference(images, keep_probability, phase_train=True, + +def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None): batch_norm_params = { # Decay for the moving averages. @@ -101,7 +102,7 @@ def inference(images, keep_probability, phase_train=True, 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], } with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_initializer=slim.initializers.xavier_initializer(), + weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): @@ -128,13 +129,13 @@ def inception_resnet_v2(inputs, is_training=True, end_points: the set of end_points from the inception model. """ end_points = {} - + with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): - + # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') @@ -162,7 +163,7 @@ def inception_resnet_v2(inputs, is_training=True, net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net - + # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): @@ -184,10 +185,10 @@ def inception_resnet_v2(inputs, is_training=True, scope='Conv2d_0b_1x1') net = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) - + end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) - + # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): @@ -204,10 +205,10 @@ def inception_resnet_v2(inputs, is_training=True, tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) - + end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) - + with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') @@ -228,28 +229,28 @@ def inception_resnet_v2(inputs, is_training=True, scope='MaxPool_1a_3x3') net = tf.concat([tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) - + end_points['Mixed_7a'] = net - + net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) - + net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net - + with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) - + net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') - + end_points['PreLogitsFlatten'] = net - - net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, + + net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) - + return net, end_points diff --git a/src/models/squeezenet.py b/src/models/squeezenet.py index ae117e1a6..efd02c210 100644 --- a/src/models/squeezenet.py +++ b/src/models/squeezenet.py @@ -2,7 +2,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import tensorflow.contrib.slim as slim def fire_module(inputs, @@ -62,6 +63,6 @@ def inference(images, keep_probability, phase_train=True, bottleneck_layer_size= net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv10') net = slim.avg_pool2d(net, net.get_shape()[1:3], scope='avgpool10') net = tf.squeeze(net, [1, 2], name='logits') - net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, + net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, None diff --git a/src/train_softmax.py b/src/train_softmax.py index 6b0b28b58..69311560b 100644 --- a/src/train_softmax.py +++ b/src/train_softmax.py @@ -1,19 +1,19 @@ """Training a face recognizer with TensorFlow using softmax cross entropy loss """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -31,7 +31,8 @@ import time import sys import random -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import importlib import argparse @@ -45,7 +46,7 @@ from tensorflow.python.ops import array_ops def main(args): - + network = importlib.import_module(args.model_def) image_size = (args.image_size, args.image_size) @@ -61,7 +62,7 @@ def main(args): # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) - + # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) @@ -70,55 +71,55 @@ def main(args): random.seed(args.seed) dataset = facenet.get_dataset(args.data_dir) if args.filter_filename: - dataset = filter_dataset(dataset, os.path.expanduser(args.filter_filename), + dataset = filter_dataset(dataset, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) - + if args.validation_set_split_ratio>0.0: train_set, val_set = facenet.split_dataset(dataset, args.validation_set_split_ratio, args.min_nrof_val_images_per_class, 'SPLIT_IMAGES') else: train_set, val_set = dataset, [] - + nrof_classes = len(train_set) - + print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) - + if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) - + with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) - + # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list)>0, 'The training set should not be empty' - + val_image_list, val_label_list = facenet.get_image_paths_and_labels(val_set) - # Create a queue that produces indices into the image_list and label_list + # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) - + index_dequeue_op = index_queue.dequeue_many(args.batch_size*args.epoch_size, 'index_dequeue') - + learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') - + nrof_preprocess_threads = 4 input_queue = data_flow_ops.FIFOQueue(capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], @@ -130,21 +131,21 @@ def main(args): image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') - + print('Number of classes in training set: %d' % nrof_classes) print('Number of examples in training set: %d' % len(image_list)) print('Number of classes in validation set: %d' % len(val_set)) print('Number of examples in validation set: %d' % len(val_image_list)) - + print('Building training graph') - + # Build the inference graph - prelogits, _ = network.inference(image_batch, args.keep_probability, - phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, + prelogits, _ = network.inference(image_batch, args.keep_probability, + phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) - logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, - weights_initializer=slim.initializers.xavier_initializer(), + logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, + weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) @@ -168,18 +169,18 @@ def main(args): labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) - + correct_prediction = tf.cast(tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) - + # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters - train_op = facenet.train(total_loss, global_step, args.optimizer, + train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) - + # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) @@ -228,19 +229,19 @@ def main(args): # Train for one epoch t = time.time() cont = train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, - learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, + learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, stat, cross_entropy_mean, accuracy, learning_rate, prelogits, prelogits_center_loss, args.random_rotate, args.random_crop, args.random_flip, prelogits_norm, args.prelogits_hist_max, args.use_fixed_image_standardization) stat['time_train'][epoch-1] = time.time() - t - + if not cont: break - + t = time.time() if len(val_image_list)>0 and ((epoch-1) % args.validate_every_n_epochs == args.validate_every_n_epochs-1 or epoch==args.max_nrof_epochs): validate(args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, - phase_train_placeholder, batch_size_placeholder, + phase_train_placeholder, batch_size_placeholder, stat, total_loss, regularization_losses, cross_entropy_mean, accuracy, args.validate_every_n_epochs, args.use_fixed_image_standardization) stat['time_validate'][epoch-1] = time.time() - t @@ -250,8 +251,8 @@ def main(args): # Evaluate on LFW t = time.time() if args.lfw_dir: - evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, - embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, + evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, + embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, args.lfw_distance_metric, args.lfw_subtract_mean, args.lfw_use_flipped_images, args.use_fixed_image_standardization) stat['time_evaluate'][epoch-1] = time.time() - t @@ -259,9 +260,9 @@ def main(args): with h5py.File(stat_file_name, 'w') as f: for key, value in stat.iteritems(): f.create_dataset(key, data=value) - + return model_dir - + def find_threshold(var, percentile): hist, bin_edges = np.histogram(var, 100) cdf = np.float32(np.cumsum(hist)) / np.sum(hist) @@ -269,7 +270,7 @@ def find_threshold(var, percentile): #plt.plot(bin_centers, cdf) threshold = np.interp(percentile*0.01, cdf, bin_centers) return threshold - + def filter_dataset(dataset, data_filename, percentile, min_nrof_images_per_class): with h5py.File(data_filename,'r') as f: distance_to_center = np.array(f.get('distance_to_center')) @@ -292,26 +293,26 @@ def filter_dataset(dataset, data_filename, percentile, min_nrof_images_per_class del(filtered_dataset[i]) return filtered_dataset - -def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, - learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, step, - loss, train_op, summary_op, summary_writer, reg_losses, learning_rate_schedule_file, - stat, cross_entropy_mean, accuracy, + +def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, + learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, step, + loss, train_op, summary_op, summary_writer, reg_losses, learning_rate_schedule_file, + stat, cross_entropy_mean, accuracy, learning_rate, prelogits, prelogits_center_loss, random_rotate, random_crop, random_flip, prelogits_norm, prelogits_hist_max, use_fixed_image_standardization): batch_number = 0 - + if args.learning_rate>0.0: lr = args.learning_rate else: lr = facenet.get_learning_rate_from_file(learning_rate_schedule_file, epoch) - + if lr<=0: - return False + return False index_epoch = sess.run(index_dequeue_op) label_epoch = np.array(label_list)[index_epoch] image_epoch = np.array(image_list)[index_epoch] - + # Enqueue one epoch of image paths and labels labels_array = np.expand_dims(np.array(label_epoch),1) image_paths_array = np.expand_dims(np.array(image_epoch),1) @@ -330,7 +331,7 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o summary_writer.add_summary(summary_str, global_step=step_) else: loss_, _, step_, reg_losses_, prelogits_, cross_entropy_mean_, lr_, prelogits_norm_, accuracy_, center_loss_ = sess.run(tensor_list, feed_dict=feed_dict) - + duration = time.time() - start_time stat['loss'][step_-1] = loss_ stat['center_loss'][step_-1] = center_loss_ @@ -340,7 +341,7 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o stat['learning_rate'][epoch-1] = lr_ stat['accuracy'][step_-1] = accuracy_ stat['prelogits_hist'][epoch-1,:] += np.histogram(np.minimum(np.abs(prelogits_), prelogits_hist_max), bins=1000, range=(0.0, prelogits_hist_max))[0] - + duration = time.time() - start_time print('Epoch: [%d][%d/%d]\tTime %.3f\tLoss %2.3f\tXent %2.3f\tRegLoss %2.3f\tAccuracy %2.3f\tLr %2.5f\tCl %2.3f' % (epoch, batch_number+1, args.epoch_size, duration, loss_, cross_entropy_mean_, np.sum(reg_losses_), accuracy_, lr_, center_loss_)) @@ -354,14 +355,14 @@ def train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_o return True def validate(args, sess, epoch, image_list, label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, - phase_train_placeholder, batch_size_placeholder, + phase_train_placeholder, batch_size_placeholder, stat, loss, regularization_losses, cross_entropy_mean, accuracy, validate_every_n_epochs, use_fixed_image_standardization): - + print('Running forward pass on validation set') nrof_batches = len(label_list) // args.lfw_batch_size nrof_images = nrof_batches * args.lfw_batch_size - + # Enqueue one epoch of image paths and labels labels_array = np.expand_dims(np.array(label_list[:nrof_images]),1) image_paths_array = np.expand_dims(np.array(image_list[:nrof_images]),1) @@ -394,12 +395,12 @@ def validate(args, sess, epoch, image_list, label_list, enqueue_op, image_paths_ (epoch, duration, np.mean(loss_array), np.mean(xent_array), np.mean(accuracy_array))) -def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, +def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer, stat, epoch, distance_metric, subtract_mean, use_flipped_images, use_fixed_image_standardization): start_time = time.time() # Run forward pass to calculate embeddings print('Runnning forward pass on LFW images') - + # Enqueue one epoch of image paths and labels nrof_embeddings = len(actual_issame)*2 # nrof_pairs * nrof_images_per_pair nrof_flips = 2 if use_flipped_images else 1 @@ -413,7 +414,7 @@ def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phas # Flip every second image control_array += (labels_array % 2)*facenet.FLIP sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) - + embedding_size = int(embeddings.get_shape()[1]) assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' nrof_batches = nrof_images // batch_size @@ -438,7 +439,7 @@ def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phas assert np.array_equal(lab_array, np.arange(nrof_images))==True, 'Wrong labels used for evaluation, possibly caused by training examples left in the input pipeline' _, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, actual_issame, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) - + print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) lfw_time = time.time() - start_time @@ -463,7 +464,7 @@ def save_variables_and_metagraph(sess, saver, summary_writer, model_dir, model_n save_time_variables = time.time() - start_time print('Variables saved in %.2f seconds' % save_time_variables) metagraph_filename = os.path.join(model_dir, 'model-%s.meta' % model_name) - save_time_metagraph = 0 + save_time_metagraph = 0 if not os.path.exists(metagraph_filename): print('Saving metagraph') start_time = time.time() @@ -475,12 +476,12 @@ def save_variables_and_metagraph(sess, saver, summary_writer, model_dir, model_n summary.value.add(tag='time/save_variables', simple_value=save_time_variables) summary.value.add(tag='time/save_metagraph', simple_value=save_time_metagraph) summary_writer.add_summary(summary, step) - + def parse_arguments(argv): parser = argparse.ArgumentParser() - - parser.add_argument('--logs_base_dir', type=str, + + parser.add_argument('--logs_base_dir', type=str, help='Directory where to write event logs.', default='~/logs/facenet') parser.add_argument('--models_base_dir', type=str, help='Directory where to write trained models and checkpoints.', default='~/models/facenet') @@ -503,14 +504,14 @@ def parse_arguments(argv): help='Number of batches per epoch.', default=1000) parser.add_argument('--embedding_size', type=int, help='Dimensionality of the embedding.', default=128) - parser.add_argument('--random_crop', + parser.add_argument('--random_crop', help='Performs random cropping of training images. If false, the center image_size pixels from the training images are used. ' + 'If the size of the images in the data directory is equal to image_size no cropping is performed', action='store_true') - parser.add_argument('--random_flip', + parser.add_argument('--random_flip', help='Performs random horizontal flipping of training images.', action='store_true') - parser.add_argument('--random_rotate', + parser.add_argument('--random_rotate', help='Performs random rotations of training images.', action='store_true') - parser.add_argument('--use_fixed_image_standardization', + parser.add_argument('--use_fixed_image_standardization', help='Performs fixed standardization of images.', action='store_true') parser.add_argument('--keep_probability', type=float, help='Keep probability of dropout for the fully connected layer(s).', default=1.0) @@ -541,7 +542,7 @@ def parse_arguments(argv): help='Random seed.', default=666) parser.add_argument('--nrof_preprocess_threads', type=int, help='Number of preprocessing (data loading and augmentation) threads.', default=4) - parser.add_argument('--log_histograms', + parser.add_argument('--log_histograms', help='Enables logging of weight/bias histograms in tensorboard.', action='store_true') parser.add_argument('--learning_rate_schedule_file', type=str, help='File containing the learning rate schedule that is used when learning_rate is set to to -1.', default='data/learning_rate_schedule.txt') @@ -557,7 +558,7 @@ def parse_arguments(argv): help='The ratio of the total dataset to use for validation', default=0.0) parser.add_argument('--min_nrof_val_images_per_class', type=float, help='Classes with fewer images will be removed from the validation set', default=0) - + # Parameters for validation on LFW parser.add_argument('--lfw_pairs', type=str, help='The file containing the pairs to use for validation.', default='data/pairs.txt') @@ -569,12 +570,12 @@ def parse_arguments(argv): help='Number of folds to use for cross validation. Mainly used for testing.', default=10) parser.add_argument('--lfw_distance_metric', type=int, help='Type of distance metric to use. 0: Euclidian, 1:Cosine similarity distance.', default=0) - parser.add_argument('--lfw_use_flipped_images', + parser.add_argument('--lfw_use_flipped_images', help='Concatenates embeddings for the image and its horizontally flipped counterpart.', action='store_true') - parser.add_argument('--lfw_subtract_mean', + parser.add_argument('--lfw_subtract_mean', help='Subtract feature mean before calculating distance.', action='store_true') return parser.parse_args(argv) - + if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) diff --git a/src/train_tripletloss.py b/src/train_tripletloss.py index d6df19a4d..6a775f1d8 100644 --- a/src/train_tripletloss.py +++ b/src/train_tripletloss.py @@ -2,19 +2,19 @@ FaceNet: A Unified Embedding for Face Recognition and Clustering: http://arxiv.org/abs/1503.03832 """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -31,7 +31,8 @@ import os.path import time import sys -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import importlib import itertools @@ -44,7 +45,7 @@ from six.moves import xrange # @UnresolvedImport def main(args): - + network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') @@ -57,47 +58,47 @@ def main(args): # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) - + # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) train_set = facenet.get_dataset(args.data_dir) - + print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) - + if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) - - + + with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') - + batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') - + phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') - + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None,3), name='labels') - + input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3,), (3,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder]) - + nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): @@ -106,21 +107,21 @@ def main(args): for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) - + if args.random_crop: image = tf.random_crop(image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) - + #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) - + image_batch, labels_batch = tf.train.batch_join( - images_and_labels, batch_size=batch_size_placeholder, + images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) @@ -129,15 +130,15 @@ def main(args): labels_batch = tf.identity(labels_batch, 'label_batch') # Build the inference graph - prelogits, _ = network.inference(image_batch, args.keep_probability, + prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) - + embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1,3,args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) - + learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) @@ -147,9 +148,9 @@ def main(args): total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters - train_op = facenet.train(total_loss, global_step, args.optimizer, + train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) - + # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) @@ -158,7 +159,7 @@ def main(args): # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) - sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder:True}) @@ -181,7 +182,7 @@ def main(args): epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, - batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size, anchor, positive, negative, triplet_loss) @@ -190,19 +191,19 @@ def main(args): # Evaluate on LFW if args.lfw_dir: - evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, - batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, + evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size) return model_dir def train(args, sess, dataset, epoch, image_paths_placeholder, labels_placeholder, labels_batch, - batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, loss, train_op, summary_op, summary_writer, learning_rate_schedule_file, embedding_size, anchor, positive, negative, triplet_loss): batch_number = 0 - + if args.learning_rate>0.0: lr = args.learning_rate else: @@ -210,7 +211,7 @@ def train(args, sess, dataset, epoch, image_paths_placeholder, labels_placeholde while batch_number < args.epoch_size: # Sample people randomly from the dataset image_paths, num_per_class = sample_people(dataset, args.people_per_batch, args.images_per_person) - + print('Running forward pass on sampled images: ', end='') start_time = time.time() nrof_examples = args.people_per_batch * args.images_per_person @@ -221,17 +222,17 @@ def train(args, sess, dataset, epoch, image_paths_placeholder, labels_placeholde nrof_batches = int(np.ceil(nrof_examples / args.batch_size)) for i in range(nrof_batches): batch_size = min(nrof_examples-i*args.batch_size, args.batch_size) - emb, lab = sess.run([embeddings, labels_batch], feed_dict={batch_size_placeholder: batch_size, + emb, lab = sess.run([embeddings, labels_batch], feed_dict={batch_size_placeholder: batch_size, learning_rate_placeholder: lr, phase_train_placeholder: True}) emb_array[lab,:] = emb print('%.3f' % (time.time()-start_time)) # Select triplets based on the embeddings print('Selecting suitable triplets for training') - triplets, nrof_random_negs, nrof_triplets = select_triplets(emb_array, num_per_class, + triplets, nrof_random_negs, nrof_triplets = select_triplets(emb_array, num_per_class, image_paths, args.people_per_batch, args.alpha) selection_time = time.time() - start_time - print('(nrof_random_negs, nrof_triplets) = (%d, %d): time=%.3f seconds' % + print('(nrof_random_negs, nrof_triplets) = (%d, %d): time=%.3f seconds' % (nrof_random_negs, nrof_triplets, selection_time)) # Perform training on the selected triplets @@ -261,13 +262,13 @@ def train(args, sess, dataset, epoch, image_paths_placeholder, labels_placeholde i += 1 train_time += duration summary.value.add(tag='loss', simple_value=err) - + # Add validation loss and accuracy to summary #pylint: disable=maybe-no-member summary.value.add(tag='time/selection', simple_value=selection_time) summary_writer.add_summary(summary, step) return step - + def select_triplets(embeddings, nrof_images_per_class, image_paths, people_per_batch, alpha): """ Select the triplets for training """ @@ -275,7 +276,7 @@ def select_triplets(embeddings, nrof_images_per_class, image_paths, people_per_b emb_start_idx = 0 num_trips = 0 triplets = [] - + # VGG Face: Choosing good triplets is crucial and should strike a balance between # selecting informative (i.e. challenging) examples and swamping training with examples that # are too hard. This is achieve by extending each pair (a, p) to a triplet (a, p, n) by sampling @@ -299,7 +300,7 @@ def select_triplets(embeddings, nrof_images_per_class, image_paths, people_per_b rnd_idx = np.random.randint(nrof_random_negs) n_idx = all_neg[rnd_idx] triplets.append((image_paths[a_idx], image_paths[p_idx], image_paths[n_idx])) - #print('Triplet %d: (%d, %d, %d), pos_dist=%2.6f, neg_dist=%2.6f (%d, %d, %d, %d, %d)' % + #print('Triplet %d: (%d, %d, %d), pos_dist=%2.6f, neg_dist=%2.6f (%d, %d, %d, %d, %d)' % # (trip_idx, a_idx, p_idx, n_idx, pos_dist_sqr, neg_dists_sqr[n_idx], nrof_random_negs, rnd_idx, i, j, emb_start_idx)) trip_idx += 1 @@ -312,12 +313,12 @@ def select_triplets(embeddings, nrof_images_per_class, image_paths, people_per_b def sample_people(dataset, people_per_batch, images_per_person): nrof_images = people_per_batch * images_per_person - + # Sample classes from the dataset nrof_classes = len(dataset) class_indices = np.arange(nrof_classes) np.random.shuffle(class_indices) - + i = 0 image_paths = [] num_per_class = [] @@ -335,16 +336,16 @@ def sample_people(dataset, people_per_batch, images_per_person): image_paths += image_paths_for_class num_per_class.append(nrof_images_from_class) i+=1 - + return image_paths, num_per_class -def evaluate(sess, image_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, - batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, batch_size, +def evaluate(sess, image_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, + batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer, embedding_size): start_time = time.time() # Run forward pass to calculate embeddings print('Running forward pass on LFW images: ', end='') - + nrof_images = len(actual_issame)*2 assert(len(image_paths)==nrof_images) labels_array = np.reshape(np.arange(nrof_images),(-1,3)) @@ -360,11 +361,11 @@ def evaluate(sess, image_paths, embeddings, labels_batch, image_paths_placeholde emb_array[lab,:] = emb label_check_array[lab] = 1 print('%.3f' % (time.time()-start_time)) - + assert(np.all(label_check_array==1)) - + _, _, accuracy, val, val_std, far = lfw.evaluate(emb_array, actual_issame, nrof_folds=nrof_folds) - + print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) lfw_time = time.time() - start_time @@ -387,7 +388,7 @@ def save_variables_and_metagraph(sess, saver, summary_writer, model_dir, model_n save_time_variables = time.time() - start_time print('Variables saved in %.2f seconds' % save_time_variables) metagraph_filename = os.path.join(model_dir, 'model-%s.meta' % model_name) - save_time_metagraph = 0 + save_time_metagraph = 0 if not os.path.exists(metagraph_filename): print('Saving metagraph') start_time = time.time() @@ -399,8 +400,8 @@ def save_variables_and_metagraph(sess, saver, summary_writer, model_dir, model_n summary.value.add(tag='time/save_variables', simple_value=save_time_variables) summary.value.add(tag='time/save_metagraph', simple_value=save_time_metagraph) summary_writer.add_summary(summary, step) - - + + def get_learning_rate_from_file(filename, epoch): with open(filename, 'r') as f: for line in f.readlines(): @@ -413,12 +414,12 @@ def get_learning_rate_from_file(filename, epoch): learning_rate = lr else: return learning_rate - + def parse_arguments(argv): parser = argparse.ArgumentParser() - - parser.add_argument('--logs_base_dir', type=str, + + parser.add_argument('--logs_base_dir', type=str, help='Directory where to write event logs.', default='~/logs/facenet') parser.add_argument('--models_base_dir', type=str, help='Directory where to write trained models and checkpoints.', default='~/models/facenet') @@ -447,10 +448,10 @@ def parse_arguments(argv): help='Positive to negative triplet distance margin.', default=0.2) parser.add_argument('--embedding_size', type=int, help='Dimensionality of the embedding.', default=128) - parser.add_argument('--random_crop', + parser.add_argument('--random_crop', help='Performs random cropping of training images. If false, the center image_size pixels from the training images are used. ' + 'If the size of the images in the data directory is equal to image_size no cropping is performed', action='store_true') - parser.add_argument('--random_flip', + parser.add_argument('--random_flip', help='Performs random horizontal flipping of training images.', action='store_true') parser.add_argument('--keep_probability', type=float, help='Keep probability of dropout for the fully connected layer(s).', default=1.0) @@ -480,7 +481,7 @@ def parse_arguments(argv): parser.add_argument('--lfw_nrof_folds', type=int, help='Number of folds to use for cross validation. Mainly used for testing.', default=10) return parser.parse_args(argv) - + if __name__ == '__main__': main(parse_arguments(sys.argv[1:])) diff --git a/src/validate_on_lfw.py b/src/validate_on_lfw.py index ac456c5f6..574bd5952 100644 --- a/src/validate_on_lfw.py +++ b/src/validate_on_lfw.py @@ -4,19 +4,19 @@ in the same directory, and the metagraph should have the extension '.meta'. """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -29,7 +29,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import argparse import facenet @@ -42,23 +43,23 @@ from scipy import interpolate def main(args): - + with tf.Graph().as_default(): - + with tf.Session() as sess: - + # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) - + image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') - + nrof_preprocess_threads = 4 image_size = (args.image_size, args.image_size) eval_input_queue = data_flow_ops.FIFOQueue(capacity=2000000, @@ -67,14 +68,14 @@ def main(args): shared_name=None, name=None) eval_enqueue_op = eval_input_queue.enqueue_many([image_paths_placeholder, labels_placeholder, control_placeholder], name='eval_enqueue_op') image_batch, label_batch = facenet.create_input_pipeline(eval_input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) - + # Load the model input_map = {'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder} facenet.load_model(args.model, input_map=input_map) # Get output tensor embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") -# +# coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) @@ -82,12 +83,12 @@ def main(args): embeddings, label_batch, paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, args.distance_metric, args.subtract_mean, args.use_flipped_images, args.use_fixed_image_standardization) - + def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, distance_metric, subtract_mean, use_flipped_images, use_fixed_image_standardization): # Run forward pass to calculate embeddings print('Runnning forward pass on LFW images') - + # Enqueue one epoch of image paths and labels nrof_embeddings = len(actual_issame)*2 # nrof_pairs * nrof_images_per_pair nrof_flips = 2 if use_flipped_images else 1 @@ -101,7 +102,7 @@ def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phas # Flip every second image control_array += (labels_array % 2)*facenet.FLIP sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) - + embedding_size = int(embeddings.get_shape()[1]) assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' nrof_batches = nrof_images // batch_size @@ -126,23 +127,23 @@ def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phas assert np.array_equal(lab_array, np.arange(nrof_images))==True, 'Wrong labels used for evaluation, possibly caused by training examples left in the input pipeline' tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(embeddings, actual_issame, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) - + print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy))) print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) - + auc = metrics.auc(fpr, tpr) print('Area Under Curve (AUC): %1.3f' % auc) eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.) print('Equal Error Rate (EER): %1.3f' % eer) - + def parse_arguments(argv): parser = argparse.ArgumentParser() - + parser.add_argument('lfw_dir', type=str, help='Path to the data directory containing aligned LFW face patches.') parser.add_argument('--lfw_batch_size', type=int, help='Number of images to process in a batch in the LFW test set.', default=100) - parser.add_argument('model', type=str, + parser.add_argument('model', type=str, help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file') parser.add_argument('--image_size', type=int, help='Image size (height, width) in pixels.', default=160) @@ -152,11 +153,11 @@ def parse_arguments(argv): help='Number of folds to use for cross validation. Mainly used for testing.', default=10) parser.add_argument('--distance_metric', type=int, help='Distance metric 0:euclidian, 1:cosine similarity.', default=0) - parser.add_argument('--use_flipped_images', + parser.add_argument('--use_flipped_images', help='Concatenates embeddings for the image and its horizontally flipped counterpart.', action='store_true') - parser.add_argument('--subtract_mean', + parser.add_argument('--subtract_mean', help='Subtract feature mean before calculating distance.', action='store_true') - parser.add_argument('--use_fixed_image_standardization', + parser.add_argument('--use_fixed_image_standardization', help='Performs fixed standardization of images.', action='store_true') return parser.parse_args(argv) diff --git a/test/batch_norm_test.py b/test/batch_norm_test.py index 48cfd555f..34050b24c 100644 --- a/test/batch_norm_test.py +++ b/test/batch_norm_test.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -21,7 +21,8 @@ # SOFTWARE. import unittest -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import models import numpy as np import numpy.testing as testing @@ -31,36 +32,35 @@ class BatchNormTest(unittest.TestCase): @unittest.skip("Skip batch norm test case") def testBatchNorm(self): - + tf.set_random_seed(123) - + x = tf.placeholder(tf.float32, [None, 20, 20, 10], name='input') phase_train = tf.placeholder(tf.bool, name='phase_train') - + # generate random noise to pass into batch norm #x_gen = tf.random_normal([50,20,20,10]) - + bn = models.network.batch_norm(x, phase_train) - + init = tf.global_variables_initializer() sess = tf.Session(config=tf.ConfigProto()) sess.run(init) - + with sess.as_default(): - + #generate a constant variable to pass into batch norm y = np.random.normal(0, 1, size=(50,20,20,10)) - + feed_dict = {x: y, phase_train: True} sess.run(bn, feed_dict=feed_dict) - + feed_dict = {x: y, phase_train: False} y1 = sess.run(bn, feed_dict=feed_dict) y2 = sess.run(bn, feed_dict=feed_dict) - + testing.assert_almost_equal(y1, y2, 10, 'Output from two forward passes with phase_train==false should be equal') if __name__ == "__main__": unittest.main() - \ No newline at end of file diff --git a/test/center_loss_test.py b/test/center_loss_test.py index 196cd1143..3806910f5 100644 --- a/test/center_loss_test.py +++ b/test/center_loss_test.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -21,12 +21,13 @@ # SOFTWARE. import unittest -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import facenet class CenterLossTest(unittest.TestCase): - + def testCenterLoss(self): @@ -34,42 +35,42 @@ def testCenterLoss(self): nrof_features = 2 nrof_classes = 16 alfa = 0.5 - + with tf.Graph().as_default(): - + features = tf.placeholder(tf.float32, shape=(batch_size, nrof_features), name='features') labels = tf.placeholder(tf.int32, shape=(batch_size,), name='labels') # Define center loss center_loss, centers = facenet.center_loss(features, labels, alfa, nrof_classes) - - label_to_center = np.array( [ + + label_to_center = np.array( [ [-3,-3], [-3,-1], [-3,1], [-3,3], [-1,-3], [-1,-1], [-1,1], [-1,3], [ 1,-3], [ 1,-1], [ 1,1], [ 1,3], - [ 3,-3], [ 3,-1], [ 3,1], [ 3,3] + [ 3,-3], [ 3,-1], [ 3,1], [ 3,3] ]) - + sess = tf.Session() with sess.as_default(): sess.run(tf.global_variables_initializer()) np.random.seed(seed=666) - + for _ in range(0,100): # Create array of random labels lbls = np.random.randint(low=0, high=nrof_classes, size=(batch_size,)) feats = create_features(label_to_center, batch_size, nrof_features, lbls) center_loss_, centers_ = sess.run([center_loss, centers], feed_dict={features:feats, labels:lbls}) - + # After a large number of updates the estimated centers should be close to the true ones np.testing.assert_almost_equal(centers_, label_to_center, decimal=5, err_msg='Incorrect estimated centers') np.testing.assert_almost_equal(center_loss_, 0.0, decimal=5, err_msg='Incorrect center loss') - + def create_features(label_to_center, batch_size, nrof_features, labels): # Map label to center -# label_to_center_dict = { +# label_to_center_dict = { # 0:(-3,-3), 1:(-3,-1), 2:(-3,1), 3:(-3,3), # 4:(-1,-3), 5:(-1,-1), 6:(-1,1), 7:(-1,3), # 8:( 1,-3), 9:( 1,-1), 10:( 1,1), 11:( 1,3), @@ -82,6 +83,6 @@ def create_features(label_to_center, batch_size, nrof_features, labels): for j in range(nrof_features): feats[i,j] = cntr[j] return feats - + if __name__ == "__main__": unittest.main() diff --git a/test/restore_test.py b/test/restore_test.py index befb04dc6..b205379f1 100644 --- a/test/restore_test.py +++ b/test/restore_test.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -24,83 +24,84 @@ import tempfile import os import shutil -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np class TrainTest(unittest.TestCase): - + @classmethod def setUpClass(self): self.tmp_dir = tempfile.mkdtemp() - + @classmethod def tearDownClass(self): # Recursively remove the temporary directory shutil.rmtree(self.tmp_dir) def test_restore_noema(self): - + # Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3 x_data = np.random.rand(100).astype(np.float32) y_data = x_data * 0.1 + 0.3 - + # Try to find values for W and b that compute y_data = W * x_data + b # (We know that W should be 0.1 and b 0.3, but TensorFlow will # figure that out for us.) W = tf.Variable(tf.random_uniform([1], -1.0, 1.0), name='W') b = tf.Variable(tf.zeros([1]), name='b') y = W * x_data + b - + # Minimize the mean squared errors. loss = tf.reduce_mean(tf.square(y - y_data)) optimizer = tf.train.GradientDescentOptimizer(0.5) train = optimizer.minimize(loss) - + # Before starting, initialize the variables. We will 'run' this first. init = tf.global_variables_initializer() saver = tf.train.Saver(tf.trainable_variables()) - + # Launch the graph. sess = tf.Session() sess.run(init) - + # Fit the line. for _ in range(201): sess.run(train) - + w_reference = sess.run('W:0') b_reference = sess.run('b:0') - + saver.save(sess, os.path.join(self.tmp_dir, "model_ex1")) - + tf.reset_default_graph() saver = tf.train.import_meta_graph(os.path.join(self.tmp_dir, "model_ex1.meta")) sess = tf.Session() saver.restore(sess, os.path.join(self.tmp_dir, "model_ex1")) - + w_restored = sess.run('W:0') b_restored = sess.run('b:0') - + self.assertAlmostEqual(w_reference, w_restored, 'Restored model use different weight than the original model') self.assertAlmostEqual(b_reference, b_restored, 'Restored model use different weight than the original model') @unittest.skip("Skip restore EMA test case for now") def test_restore_ema(self): - + # Create 100 phony x, y data points in NumPy, y = x * 0.1 + 0.3 x_data = np.random.rand(100).astype(np.float32) y_data = x_data * 0.1 + 0.3 - + # Try to find values for W and b that compute y_data = W * x_data + b # (We know that W should be 0.1 and b 0.3, but TensorFlow will # figure that out for us.) W = tf.Variable(tf.random_uniform([1], -1.0, 1.0), name='W') b = tf.Variable(tf.zeros([1]), name='b') y = W * x_data + b - + # Minimize the mean squared errors. loss = tf.reduce_mean(tf.square(y - y_data)) optimizer = tf.train.GradientDescentOptimizer(0.5) @@ -111,30 +112,30 @@ def test_restore_ema(self): averages_op = ema.apply(tf.trainable_variables()) with tf.control_dependencies([opt_op]): train_op = tf.group(averages_op) - + # Before starting, initialize the variables. We will 'run' this first. init = tf.global_variables_initializer() saver = tf.train.Saver(tf.trainable_variables()) - + # Launch the graph. sess = tf.Session() sess.run(init) - + # Fit the line. for _ in range(201): sess.run(train_op) - + w_reference = sess.run('W/ExponentialMovingAverage:0') b_reference = sess.run('b/ExponentialMovingAverage:0') - + saver.save(sess, os.path.join(self.tmp_dir, "model_ex1")) - + tf.reset_default_graph() tf.train.import_meta_graph(os.path.join(self.tmp_dir, "model_ex1.meta")) sess = tf.Session() - + print('------------------------------------------------------') for var in tf.global_variables(): print('all variables: ' + var.op.name) @@ -156,18 +157,18 @@ def test_restore_ema(self): ema_name = var.op.name + '/ExponentialMovingAverage' print('%s: %s' % (ema_name, var.op.name)) restore_vars[ema_name] = var - + saver = tf.train.Saver(restore_vars, name='ema_restore') - + saver.restore(sess, os.path.join(self.tmp_dir, "model_ex1")) - + w_restored = sess.run('W:0') b_restored = sess.run('b:0') - + self.assertAlmostEqual(w_reference, w_restored, 'Restored model modes not use the EMA filtered weight') self.assertAlmostEqual(b_reference, b_restored, 'Restored model modes not use the EMA filtered bias') - + # Create a checkpoint file pointing to the model def create_checkpoint_file(model_dir, model_file): checkpoint_filename = os.path.join(model_dir, 'checkpoint') @@ -175,7 +176,6 @@ def create_checkpoint_file(model_dir, model_file): with open(checkpoint_filename, 'w') as f: f.write('model_checkpoint_path: "%s"\n' % full_model_filename) f.write('all_model_checkpoint_paths: "%s"\n' % full_model_filename) - + if __name__ == "__main__": unittest.main() - \ No newline at end of file diff --git a/test/triplet_loss_test.py b/test/triplet_loss_test.py index 2648b3061..85d60ce71 100644 --- a/test/triplet_loss_test.py +++ b/test/triplet_loss_test.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -21,23 +21,24 @@ # SOFTWARE. import unittest -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import facenet class DemuxEmbeddingsTest(unittest.TestCase): - + def testDemuxEmbeddings(self): batch_size = 3*12 embedding_size = 16 alpha = 0.2 - + with tf.Graph().as_default(): - + embeddings = tf.placeholder(tf.float64, shape=(batch_size, embedding_size), name='embeddings') anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1,3,embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, alpha) - + sess = tf.Session() with sess.as_default(): np.random.seed(seed=666) @@ -47,8 +48,8 @@ def testDemuxEmbeddings(self): pos_dist_sqr = np.sum(np.square(emb[0::3,:]-emb[1::3,:]),1) neg_dist_sqr = np.sum(np.square(emb[0::3,:]-emb[2::3,:]),1) np_triplet_loss = np.mean(np.maximum(0.0, pos_dist_sqr - neg_dist_sqr + alpha)) - + np.testing.assert_almost_equal(tf_triplet_loss, np_triplet_loss, decimal=5, err_msg='Triplet loss is incorrect') - + if __name__ == "__main__": unittest.main() diff --git a/tmp/deepdream.py b/tmp/deepdream.py index 604636bc2..916f56c56 100644 --- a/tmp/deepdream.py +++ b/tmp/deepdream.py @@ -3,7 +3,8 @@ from functools import partial import PIL.Image -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import matplotlib.pyplot as plt import urllib2 import os @@ -24,12 +25,12 @@ def main(): # Extract with zipfile.ZipFile(local_zip_file, 'r') as zip_ref: zip_ref.extractall(data_dir) - + # start with a gray image with a little noise img_noise = np.random.uniform(size=(224,224,3)) + 100.0 - + model_fn = 'tensorflow_inception_graph.pb' - + # creating TensorFlow session and loading the model graph = tf.Graph() sess = tf.InteractiveSession(graph=graph) @@ -40,14 +41,14 @@ def main(): imagenet_mean = 117.0 t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0) tf.import_graph_def(graph_def, {'input':t_preprocessed}) - + layers = [op.name for op in graph.get_operations() if op.type=='Conv2D' and 'import/' in op.name] feature_nums = [int(graph.get_tensor_by_name(name+':0').get_shape()[-1]) for name in layers] - + print('Number of layers', len(layers)) print('Total number of feature channels:', sum(feature_nums)) - - + + # Helper functions for TF Graph visualization #pylint: disable=unused-variable def strip_consts(graph_def, max_const_size=32): @@ -62,7 +63,7 @@ def strip_consts(graph_def, max_const_size=32): if size > max_const_size: tensor.tensor_content = ""%size return strip_def - + def rename_nodes(graph_def, rename_func): res_def = tf.GraphDef() for n0 in graph_def.node: @@ -72,32 +73,32 @@ def rename_nodes(graph_def, rename_func): for i, s in enumerate(n.input): n.input[i] = rename_func(s) if s[0]!='^' else '^'+rename_func(s[1:]) return res_def - + def showarray(a): a = np.uint8(np.clip(a, 0, 1)*255) plt.imshow(a) plt.show() - + def visstd(a, s=0.1): '''Normalize the image range for visualization''' return (a-a.mean())/max(a.std(), 1e-4)*s + 0.5 - + def T(layer): '''Helper for getting layer output tensor''' return graph.get_tensor_by_name("import/%s:0"%layer) - + def render_naive(t_obj, img0=img_noise, iter_n=20, step=1.0): t_score = tf.reduce_mean(t_obj) # defining the optimization objective t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation! - + img = img0.copy() for _ in range(iter_n): g, _ = sess.run([t_grad, t_score], {t_input:img}) - # normalizing the gradient, so the same step size should work + # normalizing the gradient, so the same step size should work g /= g.std()+1e-8 # for different layers and networks img += g*step showarray(visstd(img)) - + def tffunc(*argtypes): '''Helper that transforms TF-graph generating function into a regular one. See "resize" function below. @@ -109,17 +110,17 @@ def wrapper(*args, **kw): return out.eval(dict(zip(placeholders, args)), session=kw.get('session')) return wrapper return wrap - + # Helper function that uses TF to resize an image def resize(img, size): img = tf.expand_dims(img, 0) return tf.image.resize_bilinear(img, size)[0,:,:,:] resize = tffunc(np.float32, np.int32)(resize) - - + + def calc_grad_tiled(img, t_grad, tile_size=512): '''Compute the value of tensor t_grad over the image in a tiled way. - Random shifts are applied to the image to blur tile boundaries over + Random shifts are applied to the image to blur tile boundaries over multiple iterations.''' sz = tile_size h, w = img.shape[:2] @@ -131,12 +132,12 @@ def calc_grad_tiled(img, t_grad, tile_size=512): sub = img_shift[y:y+sz,x:x+sz] g = sess.run(t_grad, {t_input:sub}) grad[y:y+sz,x:x+sz] = g - return np.roll(np.roll(grad, -sx, 1), -sy, 0) - + return np.roll(np.roll(grad, -sx, 1), -sy, 0) + def render_multiscale(t_obj, img0=img_noise, iter_n=10, step=1.0, octave_n=3, octave_scale=1.4): t_score = tf.reduce_mean(t_obj) # defining the optimization objective t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation! - + img = img0.copy() for octave in range(octave_n): if octave>0: @@ -144,11 +145,11 @@ def render_multiscale(t_obj, img0=img_noise, iter_n=10, step=1.0, octave_n=3, oc img = resize(img, np.int32(hw)) for _ in range(iter_n): g = calc_grad_tiled(img, t_grad) - # normalizing the gradient, so the same step size should work + # normalizing the gradient, so the same step size should work g /= g.std()+1e-8 # for different layers and networks img += g*step showarray(visstd(img)) - + def lap_split(img): '''Split the image into lo and hi frequency components''' with tf.name_scope('split'): @@ -156,7 +157,7 @@ def lap_split(img): lo2 = tf.nn.conv2d_transpose(lo, k5x5*4, tf.shape(img), [1,2,2,1]) hi = img-lo2 return lo, hi - + def lap_split_n(img, n): '''Build Laplacian pyramid with n splits''' levels = [] @@ -165,7 +166,7 @@ def lap_split_n(img, n): levels.append(hi) levels.append(img) return levels[::-1] - + def lap_merge(levels): '''Merge Laplacian pyramid''' img = levels[0] @@ -173,13 +174,13 @@ def lap_merge(levels): with tf.name_scope('merge'): img = tf.nn.conv2d_transpose(img, k5x5*4, tf.shape(hi), [1,2,2,1]) + hi return img - + def normalize_std(img, eps=1e-10): '''Normalize image by making its standard deviation = 1.0''' with tf.name_scope('normalize'): std = tf.sqrt(tf.reduce_mean(tf.square(img))) return img/tf.maximum(std, eps) - + def lap_normalize(img, scale_n=4): '''Perform the Laplacian pyramid normalization.''' img = tf.expand_dims(img,0) @@ -187,14 +188,14 @@ def lap_normalize(img, scale_n=4): tlevels = list(map(normalize_std, tlevels)) out = lap_merge(tlevels) return out[0,:,:,:] - + def render_lapnorm(t_obj, img0=img_noise, visfunc=visstd, iter_n=10, step=1.0, octave_n=3, octave_scale=1.4, lap_n=4): t_score = tf.reduce_mean(t_obj) # defining the optimization objective t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation! # build the laplacian normalization graph lap_norm_func = tffunc(np.float32)(partial(lap_normalize, scale_n=lap_n)) - + img = img0.copy() for octave in range(octave_n): if octave>0: @@ -205,12 +206,12 @@ def render_lapnorm(t_obj, img0=img_noise, visfunc=visstd, g = lap_norm_func(g) img += g*step showarray(visfunc(img)) - + def render_deepdream(t_obj, img0=img_noise, iter_n=10, step=1.5, octave_n=4, octave_scale=1.4): t_score = tf.reduce_mean(t_obj) # defining the optimization objective t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation! - + # split the image into a number of octaves img = img0 octaves = [] @@ -220,7 +221,7 @@ def render_deepdream(t_obj, img0=img_noise, hi = img-resize(lo, hw) img = lo octaves.append(hi) - + # generate details octave by octave for octave in range(octave_n): if octave>0: @@ -230,36 +231,36 @@ def render_deepdream(t_obj, img0=img_noise, g = calc_grad_tiled(img, t_grad) img += g*(step / (np.abs(g).mean()+1e-7)) showarray(img/255.0) - + # Picking some internal layer. Note that we use outputs before applying the ReLU nonlinearity # to have non-zero gradients for features with negative initial activations. layer = 'mixed4d_3x3_bottleneck_pre_relu' channel = 139 # picking some feature channel to visualize render_naive(T(layer)[:,:,:,channel]) - + render_multiscale(T(layer)[:,:,:,channel]) - + k = np.float32([1,4,6,4,1]) k = np.outer(k, k) k5x5 = k[:,:,None,None]/k.sum()*np.eye(3, dtype=np.float32) - + render_lapnorm(T(layer)[:,:,:,channel]) - + render_lapnorm(T(layer)[:,:,:,65]) - + render_lapnorm(T('mixed3b_1x1_pre_relu')[:,:,:,101]) - + render_lapnorm(T(layer)[:,:,:,65]+T(layer)[:,:,:,139], octave_n=4) - - + + img0 = PIL.Image.open('pilatus800.jpg') img0 = np.float32(img0) showarray(img0/255.0) - + render_deepdream(tf.square(T('mixed4c')), img0) - + render_deepdream(T(layer)[:,:,:,139], img0) - - + + if __name__ == '__main__': main() diff --git a/tmp/mnist_center_loss.py b/tmp/mnist_center_loss.py index 1122f7af2..5b5390542 100644 --- a/tmp/mnist_center_loss.py +++ b/tmp/mnist_center_loss.py @@ -28,7 +28,8 @@ import time from six.moves import urllib # @UnresolvedImport -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import matplotlib.pyplot as plt from tensorflow.python.ops import control_flow_ops @@ -134,13 +135,13 @@ def main(argv=None): # pylint: disable=unused-argument train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz') test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz') test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz') - + # Extract it into numpy arrays. train_data = extract_data(train_data_filename, 60000) train_labels = extract_labels(train_labels_filename, 60000) test_data = extract_data(test_data_filename, 10000) test_labels = extract_labels(test_labels_filename, 10000) - + # Generate a validation set. validation_data = train_data[:VALIDATION_SIZE, ...] validation_labels = train_labels[:VALIDATION_SIZE] @@ -190,7 +191,7 @@ def main(argv=None): # pylint: disable=unused-argument dtype=data_type())) fc2_biases = tf.Variable(tf.constant( 0.1, shape=[NUM_LABELS], dtype=data_type())) - + def batch_norm(x, phase_train): #pylint: disable=unused-variable """ Batch normalization on convolutional maps. @@ -212,7 +213,7 @@ def batch_norm(x, phase_train): #pylint: disable=unused-variable name=name+'/beta', trainable=True, dtype=x.dtype) gamma = tf.Variable(tf.constant(1.0, shape=[n_out], dtype=x.dtype), name=name+'/gamma', trainable=True, dtype=x.dtype) - + batch_mean, batch_var = tf.nn.moments(x, [0], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) def mean_var_with_update(): @@ -224,7 +225,7 @@ def mean_var_with_update(): lambda: (ema.average(batch_mean), ema.average(batch_var))) normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3) return normed - + # We will replicate the model structure for the training subgraph, as well # as the evaluation subgraphs, while sharing the trainable parameters. @@ -281,13 +282,13 @@ def model(data, train=False): #center_loss, update_centers = center_loss_op(hidden, train_labels_node) center_loss, _ = facenet.center_loss(hidden, train_labels_node, 0.95, NUM_LABELS) loss = xent_loss + beta * center_loss - + # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) # Add the regularization term to the loss. loss += 5e-4 * regularizers - + # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0, dtype=data_type()) @@ -302,14 +303,14 @@ def model(data, train=False): optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, global_step=batch) - + # Predictions for the current training minibatch. train_prediction = tf.nn.softmax(logits) - + # Predictions for the test and validation, which we'll compute less often. eval_logits, eval_embeddings = model(eval_data) eval_prediction = tf.nn.softmax(eval_logits) - + # Small utility function to evaluate a dataset by feeding batches of data to # {eval_data} and pulling the results from {eval_predictions}. # Saves memory and enables this to run on smaller GPUs. @@ -331,7 +332,7 @@ def eval_in_batches(data, sess): feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]}) predictions[begin:, :] = batch_predictions[begin - size:, :] return predictions - + def calculate_embeddings(data, sess): """Get all predictions for a dataset by running it in small batches.""" size = data.shape[0] @@ -389,9 +390,9 @@ def calculate_embeddings(data, sess): print('test_error', test_error) assert test_error == 0.0, 'expected 0.0 test_error, got %.2f' % ( test_error,) - + train_embeddings = calculate_embeddings(train_data, sess) - + color_list = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'b', 'g', 'r', 'c' ] plt.figure(1) for n in range(0,10): diff --git a/tmp/mnist_noise_labels.py b/tmp/mnist_noise_labels.py index d24e9a342..2d5978915 100644 --- a/tmp/mnist_noise_labels.py +++ b/tmp/mnist_noise_labels.py @@ -28,7 +28,8 @@ import time from six.moves import urllib # @UnresolvedImport -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np from six.moves import xrange @@ -133,13 +134,13 @@ def main(argv=None): # pylint: disable=unused-argument train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz') test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz') test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz') - + # Extract it into numpy arrays. train_data = extract_data(train_data_filename, 60000) train_labels = extract_labels(train_labels_filename, 60000) test_data = extract_data(test_data_filename, 10000) test_labels = extract_labels(test_labels_filename, 10000) - + # Generate a validation set. validation_data = train_data[:VALIDATION_SIZE, ...] validation_labels = train_labels[:VALIDATION_SIZE] @@ -236,7 +237,7 @@ def model(data, train=False): # Training computation: logits + cross-entropy loss. logits = model(train_data_node, True) - + # t: observed noisy labels # q: estimated class probabilities (output from softmax) # z: argmax of q @@ -247,18 +248,18 @@ def model(data, train=False): z = tf.one_hot(qqq, NUM_LABELS) #cross_entropy = -tf.reduce_sum(t*tf.log(q),reduction_indices=1) cross_entropy = -tf.reduce_sum((BETA*t+(1-BETA)*z)*tf.log(q),reduction_indices=1) - + loss = tf.reduce_mean(cross_entropy) - + # loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( # logits, train_labels_node)) - + # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) # Add the regularization term to the loss. loss += 5e-4 * regularizers - + # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0, dtype=data_type()) @@ -273,13 +274,13 @@ def model(data, train=False): optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, global_step=batch) - + # Predictions for the current training minibatch. train_prediction = tf.nn.softmax(logits) - + # Predictions for the test and validation, which we'll compute less often. eval_prediction = tf.nn.softmax(model(eval_data)) - + # Small utility function to evaluate a dataset by feeding batches of data to # {eval_data} and pulling the results from {eval_predictions}. # Saves memory and enables this to run on smaller GPUs. @@ -301,7 +302,7 @@ def eval_in_batches(data, sess): feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]}) predictions[begin:, :] = batch_predictions[begin - size:, :] return predictions - + # Create a local session to run the training. start_time = time.time() with tf.Session() as sess: diff --git a/tmp/mtcnn.py b/tmp/mtcnn.py index 867fe0d9d..77c01f1c0 100644 --- a/tmp/mtcnn.py +++ b/tmp/mtcnn.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -24,12 +24,13 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import align.detect_face from scipy import misc with tf.Graph().as_default(): - + sess = tf.Session() with sess.as_default(): with tf.variable_scope('pnet'): @@ -44,7 +45,7 @@ data = tf.placeholder(tf.float32, (None,48,48,3), 'input') onet = align.detect_face.ONet({'data':data}) onet.load('../../data/det3.npy', sess) - + pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) diff --git a/tmp/mtcnn_test.py b/tmp/mtcnn_test.py index e02b11a5b..bf24ba030 100644 --- a/tmp/mtcnn_test.py +++ b/tmp/mtcnn_test.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -23,7 +23,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import align.detect_face @@ -37,14 +38,14 @@ np.random.seed(666) img = np.random.rand(1,3,150,150) img = np.transpose(img, (0,2,3,1)) - + np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) - + # prob1=sess1.run('prob1:0', feed_dict={data:img}) # print(prob1[0,0,0,:]) # conv42=sess1.run('conv4-2/BiasAdd:0', feed_dict={data:img}) # print(conv42[0,0,0,:]) - + # conv42, prob1 = pnet_fun(img) # print(prob1[0,0,0,:]) # print(conv42[0,0,0,:]) @@ -52,10 +53,10 @@ # [ 0.9929 0.0071] prob1, caffe # [ 0.9929 0.0071] prob1, tensorflow - + # [ 0.1207 -0.0116 -0.1231 -0.0463] conv4-2, caffe # [ 0.1207 -0.0116 -0.1231 -0.0463] conv4-2, tensorflow - + g2 = tf.Graph() with g2.as_default(): @@ -67,22 +68,22 @@ np.random.seed(666) img = np.random.rand(73,3,24,24) img = np.transpose(img, (0,2,3,1)) - + # np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) -# +# # prob1=sess2.run('prob1:0', feed_dict={data:img}) # print(prob1[0,:]) -# +# # conv52=sess2.run('conv5-2/conv5-2:0', feed_dict={data:img}) # print(conv52[0,:]) - + # [ 0.9945 0.0055] prob1, caffe # [ 0.1108 -0.0038 -0.1631 -0.0890] conv5-2, caffe - + # [ 0.9945 0.0055] prob1, tensorflow # [ 0.1108 -0.0038 -0.1631 -0.0890] conv5-2, tensorflow - + g3 = tf.Graph() with g3.as_default(): data = tf.placeholder(tf.float32, (None,48,48,3), 'input') @@ -93,17 +94,17 @@ np.random.seed(666) img = np.random.rand(11,3,48,48) img = np.transpose(img, (0,2,3,1)) - + # np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) -# +# # prob1=sess3.run('prob1:0', feed_dict={data:img}) # print(prob1[0,:]) # print('prob1, tensorflow') -# +# # conv62=sess3.run('conv6-2/conv6-2:0', feed_dict={data:img}) # print(conv62[0,:]) # print('conv6-2, tensorflow') -# +# # conv63=sess3.run('conv6-3/conv6-3:0', feed_dict={data:img}) # print(conv63[0,:]) # print('conv6-3, tensorflow') @@ -111,7 +112,7 @@ # [ 0.9988 0.0012] prob1, caffe # [ 0.0446 -0.0968 -0.1091 -0.0212] conv6-2, caffe # [ 0.2429 0.6104 0.4074 0.3104 0.5939 0.2729 0.2132 0.5462 0.7863 0.7568] conv6-3, caffe - + # [ 0.9988 0.0012] prob1, tensorflow # [ 0.0446 -0.0968 -0.1091 -0.0212] conv6-2, tensorflow # [ 0.2429 0.6104 0.4074 0.3104 0.5939 0.2729 0.2132 0.5462 0.7863 0.7568] conv6-3, tensorflow diff --git a/tmp/mtcnn_test_pnet_dbg.py b/tmp/mtcnn_test_pnet_dbg.py index d4fdfbb6e..b1295bae0 100644 --- a/tmp/mtcnn_test_pnet_dbg.py +++ b/tmp/mtcnn_test_pnet_dbg.py @@ -2,7 +2,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import scipy.io as io import align.detect_face @@ -24,12 +25,12 @@ # data = tf.placeholder(tf.float32, (None,48,48,3), 'input') # onet = align.detect_face.ONet({'data':data}) # onet.load('../../data/det3.npy', sess) - + pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) # rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) # onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) - - + + ref = io.loadmat('pnet_dbg.mat') img_x = np.expand_dims(ref['im_data'], 0) @@ -41,14 +42,14 @@ #np.where(abs(out0[0,:,:,:]-ref['out0'])>1e-18) qqq3 = np.where(abs(out1[0,:,:,:]-ref['out1'])>1e-7) # 3390 diffs with softmax2 print(qqq3[0].shape) - + np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) - + # prob1=sess1.run('prob1:0', feed_dict={data:img}) # print(prob1[0,0,0,:]) # conv42=sess1.run('conv4-2/BiasAdd:0', feed_dict={data:img}) # print(conv42[0,0,0,:]) - + # conv42, prob1 = pnet_fun(img) # print(prob1[0,0,0,:]) # print(conv42[0,0,0,:]) @@ -56,10 +57,10 @@ # [ 0.9929 0.0071] prob1, caffe # [ 0.9929 0.0071] prob1, tensorflow - + # [ 0.1207 -0.0116 -0.1231 -0.0463] conv4-2, caffe # [ 0.1207 -0.0116 -0.1231 -0.0463] conv4-2, tensorflow - + # g2 = tf.Graph() # with g2.as_default(): @@ -71,22 +72,22 @@ # np.random.seed(666) # img = np.random.rand(73,3,24,24) # img = np.transpose(img, (0,2,3,1)) - + # np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) -# +# # prob1=sess2.run('prob1:0', feed_dict={data:img}) # print(prob1[0,:]) -# +# # conv52=sess2.run('conv5-2/conv5-2:0', feed_dict={data:img}) # print(conv52[0,:]) - + # [ 0.9945 0.0055] prob1, caffe # [ 0.1108 -0.0038 -0.1631 -0.0890] conv5-2, caffe - + # [ 0.9945 0.0055] prob1, tensorflow # [ 0.1108 -0.0038 -0.1631 -0.0890] conv5-2, tensorflow - + # g3 = tf.Graph() # with g3.as_default(): # data = tf.placeholder(tf.float32, (None,48,48,3), 'input') @@ -97,17 +98,17 @@ # np.random.seed(666) # img = np.random.rand(11,3,48,48) # img = np.transpose(img, (0,2,3,1)) - + # np.set_printoptions(formatter={'float': '{: 0.4f}'.format}) -# +# # prob1=sess3.run('prob1:0', feed_dict={data:img}) # print(prob1[0,:]) # print('prob1, tensorflow') -# +# # conv62=sess3.run('conv6-2/conv6-2:0', feed_dict={data:img}) # print(conv62[0,:]) # print('conv6-2, tensorflow') -# +# # conv63=sess3.run('conv6-3/conv6-3:0', feed_dict={data:img}) # print(conv63[0,:]) # print('conv6-3, tensorflow') @@ -115,7 +116,7 @@ # [ 0.9988 0.0012] prob1, caffe # [ 0.0446 -0.0968 -0.1091 -0.0212] conv6-2, caffe # [ 0.2429 0.6104 0.4074 0.3104 0.5939 0.2729 0.2132 0.5462 0.7863 0.7568] conv6-3, caffe - + # [ 0.9988 0.0012] prob1, tensorflow # [ 0.0446 -0.0968 -0.1091 -0.0212] conv6-2, tensorflow # [ 0.2429 0.6104 0.4074 0.3104 0.5939 0.2729 0.2132 0.5462 0.7863 0.7568] conv6-3, tensorflow diff --git a/tmp/network.py b/tmp/network.py index c375e43d6..70e92023b 100644 --- a/tmp/network.py +++ b/tmp/network.py @@ -1,19 +1,19 @@ """Functions for building the face recognition network. """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -27,7 +27,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -39,7 +40,7 @@ def conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, name, phase_train=True, use_ initializer=tf.truncated_normal_initializer(stddev=1e-1), regularizer=l2_regularizer, dtype=inpOp.dtype) cnv = tf.nn.conv2d(inpOp, kernel, [1, dH, dW, 1], padding=padType) - + if use_batch_norm: conv_bn = batch_norm(cnv, phase_train) else: @@ -81,18 +82,18 @@ def lppool(inpOp, pnorm, kH, kW, dH, dW, padding, name): pwr = tf.square(inpOp) else: pwr = tf.pow(inpOp, pnorm) - + subsamp = tf.nn.avg_pool(pwr, ksize=[1, kH, kW, 1], strides=[1, dH, dW, 1], padding=padding) subsamp_sum = tf.multiply(subsamp, kH*kW) - + if pnorm == 2: out = tf.sqrt(subsamp_sum) else: out = tf.pow(subsamp_sum, 1/pnorm) - + return out def mpool(inpOp, kH, kW, dH, dW, padding, name): @@ -100,7 +101,7 @@ def mpool(inpOp, kH, kW, dH, dW, padding, name): maxpool = tf.nn.max_pool(inpOp, ksize=[1, kH, kW, 1], strides=[1, dH, dW, 1], - padding=padding) + padding=padding) return maxpool def apool(inpOp, kH, kW, dH, dW, padding, name): @@ -132,7 +133,7 @@ def batch_norm(x, phase_train): name=name+'/beta', trainable=True, dtype=x.dtype) gamma = tf.Variable(tf.constant(1.0, shape=[n_out], dtype=x.dtype), name=name+'/gamma', trainable=True, dtype=x.dtype) - + batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments') ema = tf.train.ExponentialMovingAverage(decay=0.9) def mean_var_with_update(): @@ -145,9 +146,9 @@ def mean_var_with_update(): normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3) return normed -def inception(inp, inSize, ks, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2, o4s3, poolType, name, +def inception(inp, inSize, ks, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2, o4s3, poolType, name, phase_train=True, use_batch_norm=True, weight_decay=0.0): - + print('name = ', name) print('inputSize = ', inSize) print('kernelSize = {3,5}') @@ -161,27 +162,27 @@ def inception(inp, inSize, ks, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2, o4s3, po o4 = inSize print('outputSize = ', o1s+o2s2+o3s2+o4) print() - + net = [] - + with tf.variable_scope(name): with tf.variable_scope('branch1_1x1'): if o1s>0: conv1 = conv(inp, inSize, o1s, 1, 1, 1, 1, 'SAME', 'conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm, weight_decay=weight_decay) net.append(conv1) - + with tf.variable_scope('branch2_3x3'): if o2s1>0: conv3a = conv(inp, inSize, o2s1, 1, 1, 1, 1, 'SAME', 'conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm, weight_decay=weight_decay) conv3 = conv(conv3a, o2s1, o2s2, 3, 3, ks, ks, 'SAME', 'conv3x3', phase_train=phase_train, use_batch_norm=use_batch_norm, weight_decay=weight_decay) net.append(conv3) - + with tf.variable_scope('branch3_5x5'): if o3s1>0: conv5a = conv(inp, inSize, o3s1, 1, 1, 1, 1, 'SAME', 'conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm, weight_decay=weight_decay) conv5 = conv(conv5a, o3s1, o3s2, 5, 5, ks, ks, 'SAME', 'conv5x5', phase_train=phase_train, use_batch_norm=use_batch_norm, weight_decay=weight_decay) net.append(conv5) - + with tf.variable_scope('branch4_pool'): if poolType=='MAX': pool = mpool(inp, o4s1, o4s1, o4s3, o4s3, 'SAME', 'pool') @@ -189,12 +190,12 @@ def inception(inp, inSize, ks, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2, o4s3, po pool = lppool(inp, 2, o4s1, o4s1, o4s3, o4s3, 'SAME', 'pool') else: raise ValueError('Invalid pooling type "%s"' % poolType) - + if o4s2>0: pool_conv = conv(pool, inSize, o4s2, 1, 1, 1, 1, 'SAME', 'conv1x1', phase_train=phase_train, use_batch_norm=use_batch_norm, weight_decay=weight_decay) else: pool_conv = pool net.append(pool_conv) - + incept = array_ops.concat(net, 3, name=name) return incept diff --git a/tmp/nn2.py b/tmp/nn2.py index 736265374..a6b67affc 100644 --- a/tmp/nn2.py +++ b/tmp/nn2.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -25,13 +25,14 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import models.network as network def inference(images, keep_probability, phase_train=True, weight_decay=0.0): - """ Define an inference network for face recognition based + """ Define an inference network for face recognition based on inception modules using batch normalization - + Args: images: The images to run inference on, dimensions batch_size x height x width x channels phase_train: True if batch normalization should operate in training mode @@ -47,14 +48,14 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['conv3_3x3'] = net net = network.mpool(net, 3, 3, 2, 2, 'SAME', 'pool3') endpoints['pool3'] = net - + net = network.inception(net, 192, 1, 64, 96, 128, 16, 32, 3, 32, 1, 'MAX', 'incept3a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3a'] = net net = network.inception(net, 256, 1, 64, 96, 128, 32, 64, 3, 64, 1, 'MAX', 'incept3b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3b'] = net net = network.inception(net, 320, 2, 0, 128, 256, 32, 64, 3, 0, 2, 'MAX', 'incept3c', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3c'] = net - + net = network.inception(net, 640, 1, 256, 96, 192, 32, 64, 3, 128, 1, 'MAX', 'incept4a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept4a'] = net net = network.inception(net, 640, 1, 224, 112, 224, 32, 64, 3, 128, 1, 'MAX', 'incept4b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) @@ -65,7 +66,7 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['incept4d'] = net net = network.inception(net, 640, 2, 0, 160, 256, 64, 128, 3, 0, 2, 'MAX', 'incept4e', phase_train=phase_train, use_batch_norm=True) endpoints['incept4e'] = net - + net = network.inception(net, 1024, 1, 384, 192, 384, 48, 128, 3, 128, 1, 'MAX', 'incept5a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept5a'] = net net = network.inception(net, 1024, 1, 384, 192, 384, 48, 128, 3, 128, 1, 'MAX', 'incept5b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) @@ -76,5 +77,5 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['prelogits'] = net net = tf.nn.dropout(net, keep_probability) endpoints['dropout'] = net - + return net, endpoints diff --git a/tmp/nn3.py b/tmp/nn3.py index 2e0502c8f..dce011192 100644 --- a/tmp/nn3.py +++ b/tmp/nn3.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -25,13 +25,14 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import models.network as network def inference(images, keep_probability, phase_train=True, weight_decay=0.0): - """ Define an inference network for face recognition based + """ Define an inference network for face recognition based on inception modules using batch normalization - + Args: images: The images to run inference on, dimensions batch_size x height x width x channels phase_train: True if batch normalization should operate in training mode @@ -47,14 +48,14 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['conv3_3x3'] = net net = network.mpool(net, 3, 3, 2, 2, 'SAME', 'pool3') endpoints['pool3'] = net - + net = network.inception(net, 192, 1, 64, 96, 128, 16, 32, 3, 32, 1, 'MAX', 'incept3a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3a'] = net net = network.inception(net, 256, 1, 64, 96, 128, 32, 64, 3, 64, 1, 'MAX', 'incept3b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3b'] = net net = network.inception(net, 320, 2, 0, 128, 256, 32, 64, 3, 0, 2, 'MAX', 'incept3c', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3c'] = net - + net = network.inception(net, 640, 1, 256, 96, 192, 32, 64, 3, 128, 1, 'MAX', 'incept4a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept4a'] = net net = network.inception(net, 640, 1, 224, 112, 224, 32, 64, 3, 128, 1, 'MAX', 'incept4b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) @@ -65,7 +66,7 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['incept4d'] = net net = network.inception(net, 640, 2, 0, 160, 256, 64, 128, 3, 0, 2, 'MAX', 'incept4e', phase_train=phase_train, use_batch_norm=True) endpoints['incept4e'] = net - + net = network.inception(net, 1024, 1, 384, 192, 384, 48, 128, 3, 128, 1, 'MAX', 'incept5a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept5a'] = net net = network.inception(net, 1024, 1, 384, 192, 384, 48, 128, 3, 128, 1, 'MAX', 'incept5b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) @@ -76,5 +77,5 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['prelogits'] = net net = tf.nn.dropout(net, keep_probability) endpoints['dropout'] = net - + return net, endpoints diff --git a/tmp/nn4.py b/tmp/nn4.py index 8c3c79fd0..c432c2484 100644 --- a/tmp/nn4.py +++ b/tmp/nn4.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -25,13 +25,14 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import models.network as network def inference(images, keep_probability, phase_train=True, weight_decay=0.0): - """ Define an inference network for face recognition based + """ Define an inference network for face recognition based on inception modules using batch normalization - + Args: images: The images to run inference on, dimensions batch_size x height x width x channels phase_train: True if batch normalization should operate in training mode @@ -47,14 +48,14 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['conv3_3x3'] = net net = network.mpool(net, 3, 3, 2, 2, 'SAME', 'pool3') endpoints['pool3'] = net - + net = network.inception(net, 192, 1, 64, 96, 128, 16, 32, 3, 32, 1, 'MAX', 'incept3a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3a'] = net net = network.inception(net, 256, 1, 64, 96, 128, 32, 64, 3, 64, 1, 'MAX', 'incept3b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3b'] = net net = network.inception(net, 320, 2, 0, 128, 256, 32, 64, 3, 0, 2, 'MAX', 'incept3c', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3c'] = net - + net = network.inception(net, 640, 1, 256, 96, 192, 32, 64, 3, 128, 1, 'MAX', 'incept4a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept4a'] = net net = network.inception(net, 640, 1, 224, 112, 224, 32, 64, 3, 128, 1, 'MAX', 'incept4b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) @@ -65,7 +66,7 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['incept4d'] = net net = network.inception(net, 640, 2, 0, 160, 256, 64, 128, 3, 0, 2, 'MAX', 'incept4e', phase_train=phase_train, use_batch_norm=True) endpoints['incept4e'] = net - + net = network.inception(net, 1024, 1, 384, 192, 384, 0, 0, 3, 128, 1, 'MAX', 'incept5a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept5a'] = net net = network.inception(net, 896, 1, 384, 192, 384, 0, 0, 3, 128, 1, 'MAX', 'incept5b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) @@ -76,5 +77,5 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['prelogits'] = net net = tf.nn.dropout(net, keep_probability) endpoints['dropout'] = net - + return net, endpoints diff --git a/tmp/nn4_small2_v1.py b/tmp/nn4_small2_v1.py index 780aafe20..363145f6d 100644 --- a/tmp/nn4_small2_v1.py +++ b/tmp/nn4_small2_v1.py @@ -1,17 +1,17 @@ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -25,13 +25,14 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import models.network as network def inference(images, keep_probability, phase_train=True, weight_decay=0.0): - """ Define an inference network for face recognition based + """ Define an inference network for face recognition based on inception modules using batch normalization - + Args: images: The images to run inference on, dimensions batch_size x height x width x channels phase_train: True if batch normalization should operate in training mode @@ -47,19 +48,19 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['conv3_3x3'] = net net = network.mpool(net, 3, 3, 2, 2, 'SAME', 'pool3') endpoints['pool3'] = net - + net = network.inception(net, 192, 1, 64, 96, 128, 16, 32, 3, 32, 1, 'MAX', 'incept3a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3a'] = net net = network.inception(net, 256, 1, 64, 96, 128, 32, 64, 3, 64, 1, 'MAX', 'incept3b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3b'] = net net = network.inception(net, 320, 2, 0, 128, 256, 32, 64, 3, 0, 2, 'MAX', 'incept3c', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept3c'] = net - + net = network.inception(net, 640, 1, 256, 96, 192, 32, 64, 3, 128, 1, 'MAX', 'incept4a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept4a'] = net net = network.inception(net, 640, 2, 0, 160, 256, 64, 128, 3, 0, 2, 'MAX', 'incept4e', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept4e'] = net - + net = network.inception(net, 1024, 1, 256, 96, 384, 0, 0, 3, 96, 1, 'MAX', 'incept5a', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) endpoints['incept5a'] = net net = network.inception(net, 736, 1, 256, 96, 384, 0, 0, 3, 96, 1, 'MAX', 'incept5b', phase_train=phase_train, use_batch_norm=True, weight_decay=weight_decay) @@ -70,6 +71,5 @@ def inference(images, keep_probability, phase_train=True, weight_decay=0.0): endpoints['prelogits'] = net net = tf.nn.dropout(net, keep_probability) endpoints['dropout'] = net - + return net, endpoints - \ No newline at end of file diff --git a/tmp/random_test.py b/tmp/random_test.py index b186cc3b1..43920b62d 100644 --- a/tmp/random_test.py +++ b/tmp/random_test.py @@ -1,4 +1,5 @@ -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np from six.moves import xrange @@ -9,21 +10,21 @@ # Placeholder for input images input_placeholder = tf.placeholder(tf.float32, shape=(9, 7), name='input') - + # Split example embeddings into anchor, positive and negative #anchor, positive, negative = tf.split(0, 3, input) resh1 = tf.reshape(input_placeholder, [3,3,7]) anchor = resh1[0,:,:] positive = resh1[1,:,:] negative = resh1[2,:,:] - + # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) - + with sess.as_default(): batch = np.zeros((9,7)) batch[0,:] = 1.1 @@ -49,7 +50,7 @@ #for gt, gv in zip(grads_eval, grad_vars): #print('%40s: %.20f' % (gv.op.name, np.sum(gt))) - + #import h5py #myFile = h5py.File('/home/david/repo/TensorFace/network.h5', 'r') @@ -69,7 +70,8 @@ #print item -#import tensorflow as tf +#import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() #import numpy as np #import matplotlib.pyplot as plt #import math diff --git a/tmp/seed_test.py b/tmp/seed_test.py index 2077cf5ee..66aecb740 100644 --- a/tmp/seed_test.py +++ b/tmp/seed_test.py @@ -1,4 +1,5 @@ -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import sys import time @@ -23,19 +24,19 @@ FLAGS = tf.app.flags.FLAGS def run_train(): - + with tf.Graph().as_default(): - + # Set the seed for the graph tf.set_random_seed(666) # Placeholder for input images images_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 3), name='input') - + # Build the inference graph embeddings = inference_conv_test(images_placeholder) #embeddings = inference_affine_test(images_placeholder) - + # Split example embeddings into anchor, positive and negative anchor, positive, negative = tf.split(0, 3, embeddings) @@ -45,7 +46,7 @@ def run_train(): #anchor = resh1[0,:,:] #positive = resh1[1,:,:] #negative = resh1[2,:,:] - + # Calculate triplet loss pos_dist = tf.reduce_sum(tf.square(tf.sub(anchor, positive)), 1) neg_dist = tf.reduce_sum(tf.square(tf.sub(anchor, negative)), 1) @@ -57,17 +58,17 @@ def run_train(): #opt = tf.train.AdagradOptimizer(FLAGS.learning_rate) # Optimizer does not seem to matter grads = opt.compute_gradients(loss) train_op = opt.apply_gradients(grads) - + # Initialize the variables init = tf.global_variables_initializer() - + # Launch the graph. sess = tf.Session() sess.run(init) # Set the numpy seed np.random.seed(666) - + with sess.as_default(): grads_eval = [] all_vars = [] @@ -83,7 +84,7 @@ def run_train(): grads_eval += sess.run(grad_tensors, feed_dict=feed_dict) # Run training sess.run(train_op, feed_dict=feed_dict) - + sess.close() return (var_names, all_vars, grad_vars, grads_eval) @@ -92,7 +93,7 @@ def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType): dtype=tf.float32, stddev=1e-1), name='weights') conv = tf.nn.conv2d(inpOp, kernel, [1, dH, dW, 1], padding=padType) - + biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), trainable=True, name='biases') bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape()) @@ -107,7 +108,7 @@ def _affine(inpOp, nIn, nOut): trainable=True, name='biases') affine1 = tf.nn.relu_layer(inpOp, kernel, biases) return affine1 - + def inference_conv_test(images): conv1 = _conv(images, 3, 64, 7, 7, 2, 2, 'SAME') resh1 = tf.reshape(conv1, [-1, 147456]) @@ -130,7 +131,7 @@ def inference_affine_test(images): for i in range(len(all_vars1)): all_vars_close[i] = np.allclose(all_vars1[i], all_vars2[i], rtol=1.e-16) print('%d var %s: %s' % (i, var_names1[i].op.name, all_vars_close[i])) - + all_grads_close = [None] * len(all_grads1) for i in range(len(all_grads1)): all_grads_close[i] = np.allclose(all_grads1[i], all_grads2[i], rtol=1.e-16) diff --git a/tmp/select_triplets_test.py b/tmp/select_triplets_test.py index 149e262b3..0dab06b02 100644 --- a/tmp/select_triplets_test.py +++ b/tmp/select_triplets_test.py @@ -1,6 +1,7 @@ import facenet import numpy as np -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() FLAGS = tf.app.flags.FLAGS diff --git a/tmp/test_invariance_on_lfw.py b/tmp/test_invariance_on_lfw.py index 3bbbde00a..19d10e4af 100644 --- a/tmp/test_invariance_on_lfw.py +++ b/tmp/test_invariance_on_lfw.py @@ -2,19 +2,19 @@ This requires test images to be cropped a bit wider than the normal to give some room for the transformations. """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -27,7 +27,8 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import numpy as np import argparse import facenet @@ -39,39 +40,39 @@ import math def main(args): - + pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) result_dir = '../data/' plt.ioff() # Disable interactive plotting mode - + with tf.Graph().as_default(): with tf.Session() as sess: - + # Load the model print('Loading model "%s"' % args.model_file) facenet.load_model(args.model_file) - + # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") image_size = int(images_placeholder.get_shape()[1]) - + # Run test on LFW to check accuracy for different horizontal/vertical translations of input images if args.nrof_offsets>0: step = 3 offsets = np.asarray([x*step for x in range(-args.nrof_offsets//2+1, args.nrof_offsets//2+1)]) horizontal_offset_accuracy = [None] * len(offsets) for idx, offset in enumerate(offsets): - accuracy = evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, + accuracy = evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, paths, actual_issame, translate_images, (offset,0), 60, args.orig_image_size, args.seed) print('Hoffset: %1.3f Accuracy: %1.3f+-%1.3f' % (offset, np.mean(accuracy), np.std(accuracy))) horizontal_offset_accuracy[idx] = np.mean(accuracy) vertical_offset_accuracy = [None] * len(offsets) for idx, offset in enumerate(offsets): - accuracy = evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, + accuracy = evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, paths, actual_issame, translate_images, (0,offset), 60, args.orig_image_size, args.seed) print('Voffset: %1.3f Accuracy: %1.3f+-%1.3f' % (offset, np.mean(accuracy), np.std(accuracy))) vertical_offset_accuracy[idx] = np.mean(accuracy) @@ -95,7 +96,7 @@ def main(args): angles = np.asarray([x*step for x in range(-args.nrof_offsets//2+1, args.nrof_offsets//2+1)]) rotation_accuracy = [None] * len(angles) for idx, angle in enumerate(angles): - accuracy = evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, + accuracy = evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, paths, actual_issame, rotate_images, angle, 60, args.orig_image_size, args.seed) print('Angle: %1.3f Accuracy: %1.3f+-%1.3f' % (angle, np.mean(accuracy), np.std(accuracy))) rotation_accuracy[idx] = np.mean(accuracy) @@ -116,7 +117,7 @@ def main(args): scales = np.asarray([x*step+1 for x in range(-args.nrof_offsets//2+1, args.nrof_offsets//2+1)]) scale_accuracy = [None] * len(scales) for scale_idx, scale in enumerate(scales): - accuracy = evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, + accuracy = evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, paths, actual_issame, scale_images, scale, 60, args.orig_image_size, args.seed) print('Scale: %1.3f Accuracy: %1.3f+-%1.3f' % (scale, np.mean(accuracy), np.std(accuracy))) scale_accuracy[scale_idx] = np.mean(accuracy) @@ -130,13 +131,13 @@ def main(args): print('Saving results in %s' % result_dir) fig.savefig(os.path.join(result_dir, 'invariance_scale.png')) save_result(scales, scale_accuracy, os.path.join(result_dir, 'invariance_scale.txt')) - + def save_result(aug, acc, filename): with open(filename, "w") as f: for i in range(aug.size): f.write('%6.4f %6.4f\n' % (aug[i], acc[i])) - -def evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, + +def evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_size, embeddings, paths, actual_issame, augment_images, aug_value, batch_size, orig_image_size, seed): nrof_images = len(paths) nrof_batches = int(math.ceil(1.0*nrof_images / batch_size)) @@ -150,7 +151,7 @@ def evaluate_accuracy(sess, images_placeholder, phase_train_placeholder, image_s feed_dict = { images_placeholder: images_aug, phase_train_placeholder: False } emb_list += sess.run([embeddings], feed_dict=feed_dict) emb_array = np.vstack(emb_list) # Stack the embeddings to a nrof_examples_per_epoch x 128 matrix - + thresholds = np.arange(0, 4, 0.01) embeddings1 = emb_array[0::2] embeddings2 = emb_array[1::2] @@ -186,9 +187,9 @@ def translate_images(images, offset, image_size): def parse_arguments(argv): parser = argparse.ArgumentParser() - - parser.add_argument('--model_file', type=str, - help='File containing the model parameters as well as the model metagraph (with extension ".meta")', + + parser.add_argument('--model_file', type=str, + help='File containing the model parameters as well as the model metagraph (with extension ".meta")', default='~/models/facenet/20160514-234418/model.ckpt-500000') parser.add_argument('--nrof_offsets', type=int, help='Number of horizontal and vertical offsets to evaluate.', default=21) diff --git a/tmp/vggface16.py b/tmp/vggface16.py index df45c53ec..cfe0bca88 100644 --- a/tmp/vggface16.py +++ b/tmp/vggface16.py @@ -4,39 +4,40 @@ """ import numpy as np from scipy import io -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() def load(filename, images): #filename = '../data/vgg_face_matconvnet/data/vgg_face.mat' vgg16 = io.loadmat(filename) vgg16Layers = vgg16['net'][0][0]['layers'] - + # A function to get the weights of the VGG layers def vbbWeights(layerNumber): W = vgg16Layers[0][layerNumber][0][0][2][0][0] W = tf.constant(W) return W - + def vbbConstants(layerNumber): b = vgg16Layers[0][layerNumber][0][0][2][0][1].T b = tf.constant(np.reshape(b, (b.size))) return b - + modelGraph = {} modelGraph['input'] = images - + modelGraph['conv1_1'] = tf.nn.conv2d(modelGraph['input'], filter = vbbWeights(0), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu1_1'] = tf.nn.relu(modelGraph['conv1_1'] + vbbConstants(0)) modelGraph['conv1_2'] = tf.nn.conv2d(modelGraph['relu1_1'], filter = vbbWeights(2), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu1_2'] = tf.nn.relu(modelGraph['conv1_2'] + vbbConstants(2)) modelGraph['pool1'] = tf.nn.max_pool(modelGraph['relu1_2'], ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') - + modelGraph['conv2_1'] = tf.nn.conv2d(modelGraph['pool1'], filter = vbbWeights(5), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu2_1'] = tf.nn.relu(modelGraph['conv2_1'] + vbbConstants(5)) modelGraph['conv2_2'] = tf.nn.conv2d(modelGraph['relu2_1'], filter = vbbWeights(7), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu2_2'] = tf.nn.relu(modelGraph['conv2_2'] + vbbConstants(7)) modelGraph['pool2'] = tf.nn.max_pool(modelGraph['relu2_2'], ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') - + modelGraph['conv3_1'] = tf.nn.conv2d(modelGraph['pool2'], filter = vbbWeights(10), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu3_1'] = tf.nn.relu(modelGraph['conv3_1'] + vbbConstants(10)) modelGraph['conv3_2'] = tf.nn.conv2d(modelGraph['relu3_1'], filter = vbbWeights(12), strides = [1, 1, 1, 1], padding = 'SAME') @@ -44,7 +45,7 @@ def vbbConstants(layerNumber): modelGraph['conv3_3'] = tf.nn.conv2d(modelGraph['relu3_2'], filter = vbbWeights(14), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu3_3'] = tf.nn.relu(modelGraph['conv3_3'] + vbbConstants(14)) modelGraph['pool3'] = tf.nn.max_pool(modelGraph['relu3_3'], ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') - + modelGraph['conv4_1'] = tf.nn.conv2d(modelGraph['pool3'], filter = vbbWeights(17), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu4_1'] = tf.nn.relu(modelGraph['conv4_1'] + vbbConstants(17)) modelGraph['conv4_2'] = tf.nn.conv2d(modelGraph['relu4_1'], filter = vbbWeights(19), strides = [1, 1, 1, 1], padding = 'SAME') @@ -52,7 +53,7 @@ def vbbConstants(layerNumber): modelGraph['conv4_3'] = tf.nn.conv2d(modelGraph['relu4_2'], filter = vbbWeights(21), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu4_3'] = tf.nn.relu(modelGraph['conv4_3'] + vbbConstants(21)) modelGraph['pool4'] = tf.nn.max_pool(modelGraph['relu4_3'], ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') - + modelGraph['conv5_1'] = tf.nn.conv2d(modelGraph['pool4'], filter = vbbWeights(24), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu5_1'] = tf.nn.relu(modelGraph['conv5_1'] + vbbConstants(24)) modelGraph['conv5_2'] = tf.nn.conv2d(modelGraph['relu5_1'], filter = vbbWeights(26), strides = [1, 1, 1, 1], padding = 'SAME') @@ -60,7 +61,7 @@ def vbbConstants(layerNumber): modelGraph['conv5_3'] = tf.nn.conv2d(modelGraph['relu5_2'], filter = vbbWeights(28), strides = [1, 1, 1, 1], padding = 'SAME') modelGraph['relu5_3'] = tf.nn.relu(modelGraph['conv5_3'] + vbbConstants(28)) modelGraph['pool5'] = tf.nn.max_pool(modelGraph['relu5_3'], ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') - + modelGraph['resh1'] = tf.reshape(modelGraph['pool5'], [-1, 25088]) modelGraph['fc6'] = tf.nn.relu_layer(modelGraph['resh1'], tf.reshape(vbbWeights(31), [25088, 4096]), vbbConstants(31)) modelGraph['dropout1'] = tf.nn.dropout(modelGraph['fc6'], 0.5) diff --git a/tmp/vggverydeep19.py b/tmp/vggverydeep19.py index 86f22c561..f84180d8b 100644 --- a/tmp/vggverydeep19.py +++ b/tmp/vggverydeep19.py @@ -4,23 +4,24 @@ """ import numpy as np from scipy import io -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() def load(filename, images): vgg19 = io.loadmat(filename) vgg19Layers = vgg19['layers'] - + # A function to get the weights of the VGG layers def vbbWeights(layerNumber): W = vgg19Layers[0][layerNumber][0][0][2][0][0] W = tf.constant(W) return W - + def vbbConstants(layerNumber): b = vgg19Layers[0][layerNumber][0][0][2][0][1].T b = tf.constant(np.reshape(b, (b.size))) return b - + modelGraph = {} modelGraph['input'] = images modelGraph['conv1_1'] = tf.nn.relu(tf.nn.conv2d(modelGraph['input'], filter = vbbWeights(0), strides = [1, 1, 1, 1], padding = 'SAME') + vbbConstants(0)) @@ -44,6 +45,6 @@ def vbbConstants(layerNumber): modelGraph['conv5_3'] = tf.nn.relu(tf.nn.conv2d(modelGraph['conv5_2'], filter = vbbWeights(32), strides = [1, 1, 1, 1], padding = 'SAME') + vbbConstants(32)) modelGraph['conv5_4'] = tf.nn.relu(tf.nn.conv2d(modelGraph['conv5_3'], filter = vbbWeights(34), strides = [1, 1, 1, 1], padding = 'SAME') + vbbConstants(34)) modelGraph['avgpool5'] = tf.nn.avg_pool(modelGraph['conv5_4'], ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') - + return modelGraph diff --git a/tmp/visualize.py b/tmp/visualize.py index 6e5ea6877..f790168be 100644 --- a/tmp/visualize.py +++ b/tmp/visualize.py @@ -2,19 +2,19 @@ Based on http://nbviewer.jupyter.org/github/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb """ # MIT License -# +# # Copyright (c) 2016 David Sandberg -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -31,42 +31,43 @@ import numpy as np import sys import argparse -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import importlib from scipy import misc def main(args): - + network = importlib.import_module(args.model_def, 'inference') # Start with a gray image with a little noise np.random.seed(seed=args.seed) img_noise = np.random.uniform(size=(args.image_size,args.image_size,3)) + 100.0 - + sess = tf.Session() - + t_input = tf.placeholder(np.float32, shape=(args.image_size,args.image_size,3), name='input') # define the input tensor image_mean = 117.0 t_preprocessed = tf.expand_dims(t_input-image_mean, 0) - + # Build the inference graph - network.inference(t_preprocessed, 1.0, + network.inference(t_preprocessed, 1.0, phase_train=True, weight_decay=0.0) - + # Create a saver for restoring variables saver = tf.train.Saver(tf.global_variables()) - + # Restore the parameters saver.restore(sess, args.model_file) - + layers = [op.name for op in tf.get_default_graph().get_operations() if op.type=='Conv2D'] feature_nums = {layer: int(T(layer).get_shape()[-1]) for layer in layers} - + print('Number of layers: %d' % len(layers)) - + for layer in sorted(feature_nums.keys()): print('%s%d' % ((layer+': ').ljust(40), feature_nums[layer])) - + # Picking some internal layer. Note that we use outputs before applying the ReLU nonlinearity # to have non-zero gradients for features with negative initial activations. layer = 'InceptionResnetV1/Repeat_2/block8_3/Conv2d_1x1/Conv2D' @@ -81,7 +82,7 @@ def main(args): img = render_naive(sess, t_input, T(layer)[:,:,:,channel], img_noise) filename = '%s_%03d.png' % (layer.replace('/', '_'), channel) misc.imsave(os.path.join(result_dir, filename), img) - + def T(layer): '''Helper for getting layer output tensor''' @@ -94,21 +95,21 @@ def visstd(a, s=0.1): def render_naive(sess, t_input, t_obj, img0, iter_n=20, step=1.0): t_score = tf.reduce_mean(t_obj) # defining the optimization objective t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation! - + img = img0.copy() for _ in range(iter_n): g, _ = sess.run([t_grad, t_score], {t_input:img}) - # normalizing the gradient, so the same step size should work + # normalizing the gradient, so the same step size should work g /= g.std()+1e-8 # for different layers and networks img += g*step return visstd(img) def parse_arguments(argv): parser = argparse.ArgumentParser() - - parser.add_argument('model_file', type=str, + + parser.add_argument('model_file', type=str, help='Directory containing the graph definition and checkpoint files.') - parser.add_argument('--model_def', type=str, + parser.add_argument('--model_def', type=str, help='Model definition. Points to a module containing the definition of the inference graph.', default='models.nn4') parser.add_argument('--image_size', type=int, diff --git a/tmp/visualize_vgg_model.py b/tmp/visualize_vgg_model.py index 946893688..d37ec8267 100644 --- a/tmp/visualize_vgg_model.py +++ b/tmp/visualize_vgg_model.py @@ -1,6 +1,7 @@ import numpy as np from scipy import misc -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() from matplotlib import pyplot, image import vggverydeep19 @@ -13,7 +14,7 @@ outputWidth = 800 outputHeight = 600 -# Beta constant +# Beta constant beta = 5 # Alpha constant alpha = 100 @@ -34,7 +35,7 @@ def sqErrorLossContent(sess, modelGraph, layer): N = p.shape[3] M = p.shape[1] * p.shape[2] return (1 / (4 * N * M)) * tf.reduce_sum(tf.pow(modelGraph[layer] - sess.run(modelGraph[layer]), 2)) - + # Squared-error loss of style between the two feature representations styleLayers = [ ('conv1_1', 0.2), @@ -55,7 +56,7 @@ def intermediateCalc(x, y): return sum([W[layerNumber] * E[layerNumber] for layerNumber in range(len(styleLayers))]) session = tf.InteractiveSession() - + # Addition of extra dimension to image inputImage = np.reshape(inputImage, ((1,) + inputImage.shape)) inputImage = inputImage - meanImage19 @@ -89,17 +90,17 @@ def intermediateCalc(x, y): iterations = 2000 session.run(tf.global_variables_initializer()) session.run(nodes['input'].assign(inputImage)) - + for iters in range(iterations): session.run(trainStep) if iters%50 == 0: - # Output every 50 iterations for animation + # Output every 50 iterations for animation filename = 'output%d.png' % (iters) im = mixedImage + meanImage19 im = im[0] im = np.clip(im, 0, 255).astype('uint8') misc.imsave(filename, im) - + im = mixedImage + meanImage19 im = im[0] im = np.clip(im, 0, 255).astype('uint8') diff --git a/tmp/visualize_vggface.py b/tmp/visualize_vggface.py index c34004cdd..9d4c2e45b 100644 --- a/tmp/visualize_vggface.py +++ b/tmp/visualize_vggface.py @@ -1,19 +1,20 @@ import numpy as np -import tensorflow as tf +import tensorflow.compat.v1 as tf +tf.disable_v2_behavior() import matplotlib.pyplot as plt import tmp.vggface16 def main(): - + sess = tf.Session() - + t_input = tf.placeholder(np.float32, name='input') # define the input tensor image_mean = 117.0 t_preprocessed = tf.expand_dims(t_input-image_mean, 0) - + # Build the inference graph nodes = tmp.vggface16.load('data/vgg_face.mat', t_preprocessed) - + img_noise = np.random.uniform(size=(224,224,3)) + 117.0 # Picking some internal layer. Note that we use outputs before applying the ReLU nonlinearity @@ -27,7 +28,7 @@ def showarray(a): a = np.uint8(np.clip(a, 0, 1)*255) plt.imshow(a) plt.show() - + def visstd(a, s=0.1): '''Normalize the image range for visualization''' return (a-a.mean())/max(a.std(), 1e-4)*s + 0.5 @@ -35,15 +36,15 @@ def visstd(a, s=0.1): def render_naive(sess, t_input, t_obj, img0, iter_n=20, step=1.0): t_score = tf.reduce_mean(t_obj) # defining the optimization objective t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation! - + img = img0.copy() for _ in range(iter_n): g, _ = sess.run([t_grad, t_score], {t_input:img}) - # normalizing the gradient, so the same step size should work + # normalizing the gradient, so the same step size should work g /= g.std()+1e-8 # for different layers and networks img += g*step return visstd(img) - + if __name__ == '__main__': main()