From 9307f1dd775cd5e31fa351ce461537c9b61ccc92 Mon Sep 17 00:00:00 2001 From: mrlzla Date: Mon, 12 Mar 2018 10:56:15 +0300 Subject: [PATCH 1/2] Make PSPNet Fully-convolutional --- .gitignore | 2 + nnet/keras_layers.py | 53 +++++++++++++++ layers_builder.py => nnet/model.py | 102 +++++++---------------------- nnet/tf_layers.py | 61 +++++++++++++++++ pspnet.py | 13 ++-- 5 files changed, 147 insertions(+), 84 deletions(-) create mode 100644 nnet/keras_layers.py rename layers_builder.py => nnet/model.py (69%) create mode 100644 nnet/tf_layers.py diff --git a/.gitignore b/.gitignore index 23aa957..c888d1d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ *.prototxt *.h5 *.json +*.jpg +*.log output/ diff --git a/nnet/keras_layers.py b/nnet/keras_layers.py new file mode 100644 index 0000000..875f5d6 --- /dev/null +++ b/nnet/keras_layers.py @@ -0,0 +1,53 @@ +from keras import layers +from .tf_layers import adaptive_pooling_2d +from keras.backend import tf as ktf + + +class Interp(layers.Layer): + + def __init__(self, new_size, **kwargs): + self.new_size = new_size + super(Interp, self).__init__(**kwargs) + + def build(self, input_shape): + super(Interp, self).build(input_shape) + + def call(self, inputs, **kwargs): + new_height, new_width = self.new_size + resized = ktf.image.resize_images(inputs, [new_height, new_width], + align_corners=True) + return resized + + def compute_output_shape(self, input_shape): + return tuple([None, self.new_size[0], self.new_size[1], input_shape[3]]) + + def get_config(self): + config = super(Interp, self).get_config() + config['new_size'] = self.new_size + return config + + +class AdaptivePooling2D(layers.Layer): + + def __init__(self, output_size, mode='avg', **kwargs): + if mode not in ['avg', 'max']: + msg = "Mode must be either 'max' or 'avg'. Got '{0}'" + raise ValueError(msg.format(mode)) + self.output_size = output_size + self.mode = mode + super(AdaptivePooling2D, self).__init__(**kwargs) + + def build(self, input_shape): + super(AdaptivePooling2D, self).build(input_shape) + + def call(self, inputs, **kwargs): + return adaptive_pooling_2d(inputs, self.output_size, self.mode) + + def compute_output_shape(self, input_shape): + return tuple([input_shape[0], self.output_size, self.output_size, input_shape[3]]) + + def get_config(self): + config = super(AdaptivePooling2D, self).get_config() + config['output_size'] = self.output_size + config['mode'] = self.mode + return config diff --git a/layers_builder.py b/nnet/model.py similarity index 69% rename from layers_builder.py rename to nnet/model.py index 57129a8..d81a90b 100644 --- a/layers_builder.py +++ b/nnet/model.py @@ -1,54 +1,21 @@ from __future__ import print_function from math import ceil from keras import layers -from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D +from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D from keras.layers import BatchNormalization, Activation, Input, Dropout, ZeroPadding2D, Lambda from keras.layers.merge import Concatenate, Add from keras.models import Model from keras.optimizers import SGD from keras.backend import tf as ktf +from .keras_layers import AdaptivePooling2D, Interp import tensorflow as tf -learning_rate = 1e-3 # Layer specific learning rate -# Weight decay not implemented - def BN(name=""): return BatchNormalization(momentum=0.95, name=name, epsilon=1e-5) -class Interp(layers.Layer): - - def __init__(self, new_size, **kwargs): - self.new_size = new_size - super(Interp, self).__init__(**kwargs) - - def build(self, input_shape): - super(Interp, self).build(input_shape) - - def call(self, inputs, **kwargs): - new_height, new_width = self.new_size - resized = ktf.image.resize_images(inputs, [new_height, new_width], - align_corners=True) - return resized - - def compute_output_shape(self, input_shape): - return tuple([None, self.new_size[0], self.new_size[1], input_shape[3]]) - - def get_config(self): - config = super(Interp, self).get_config() - config['new_size'] = self.new_size - return config - - -# def Interp(x, shape): -# new_height, new_width = shape -# resized = ktf.image.resize_images(x, [new_height, new_width], -# align_corners=True) -# return resized - - def residual_conv(prev, level, pad=1, lvl=1, sub_lvl=1, modify_stride=False): lvl = str(lvl) sub_lvl = str(sub_lvl) @@ -191,55 +158,38 @@ def ResNet(inp, layers): return res -def interp_block(prev_layer, level, feature_map_shape, input_shape): - if input_shape == (473, 473): - kernel_strides_map = {1: 60, - 2: 30, - 3: 20, - 6: 10} - elif input_shape == (713, 713): - kernel_strides_map = {1: 90, - 2: 45, - 3: 30, - 6: 15} - else: - print("Pooling parameters for input shape ", - input_shape, " are not defined.") - exit(1) - +def interp_block(prev_layer, level, output_size): names = [ "conv5_3_pool" + str(level) + "_conv", "conv5_3_pool" + str(level) + "_conv_bn" ] - kernel = (kernel_strides_map[level], kernel_strides_map[level]) - strides = (kernel_strides_map[level], kernel_strides_map[level]) - prev_layer = AveragePooling2D(kernel, strides=strides)(prev_layer) + + prev_layer = AdaptivePooling2D(level, mode='avg')(prev_layer) prev_layer = Conv2D(512, (1, 1), strides=(1, 1), name=names[0], use_bias=False)(prev_layer) prev_layer = BN(name=names[1])(prev_layer) prev_layer = Activation('relu')(prev_layer) - # prev_layer = Lambda(Interp, arguments={ - # 'shape': feature_map_shape})(prev_layer) - prev_layer = Interp(feature_map_shape)(prev_layer) + + prev_layer = Interp(output_size)(prev_layer) return prev_layer -def build_pyramid_pooling_module(res, input_shape): +def build_pyramid_pooling_module(res): """Build the Pyramid Pooling Module.""" # ---PSPNet concat layers with Interpolation - feature_map_size = tuple(int(ceil(input_dim / 8.0)) - for input_dim in input_shape) - print("PSP module will interpolate to a final feature map size of %s" % - (feature_map_size, )) - interp_block1 = interp_block(res, 1, feature_map_size, input_shape) - interp_block2 = interp_block(res, 2, feature_map_size, input_shape) - interp_block3 = interp_block(res, 3, feature_map_size, input_shape) - interp_block6 = interp_block(res, 6, feature_map_size, input_shape) + shape = ktf.shape(res) + output_size = (shape[1], shape[2]) + interp_block1 = interp_block(res, 1, output_size) + interp_block2 = interp_block(res, 2, output_size) + interp_block3 = interp_block(res, 3, output_size) + interp_block6 = interp_block(res, 6, output_size) # concat all these layers. resulted # shape=(1,feature_map_size_x,feature_map_size_y,4096) - res = Concatenate()([res, + first = Interp(output_size)(res) + + res = Concatenate()([first, interp_block6, interp_block3, interp_block2, @@ -252,9 +202,11 @@ def build_pspnet(nb_classes, resnet_layers, input_shape, activation='softmax'): print("Building a PSPNet based on ResNet %i expecting inputs of shape %s predicting %i classes" % ( resnet_layers, input_shape, nb_classes)) - inp = Input((input_shape[0], input_shape[1], 3)) + inp = Input((None, None, 3)) + input_shape = ktf.shape(inp) res = ResNet(inp, layers=resnet_layers) - psp = build_pyramid_pooling_module(res, input_shape) + + psp = build_pyramid_pooling_module(res) x = Conv2D(512, (3, 3), strides=(1, 1), padding="same", name="conv5_4", use_bias=False)(psp) @@ -263,16 +215,10 @@ def build_pspnet(nb_classes, resnet_layers, input_shape, activation='softmax'): x = Dropout(0.1)(x) x = Conv2D(nb_classes, (1, 1), strides=(1, 1), name="conv6")(x) - # x = Lambda(Interp, arguments={'shape': ( - # input_shape[0], input_shape[1])})(x) - x = Interp([input_shape[0], input_shape[1]])(x) - x = Activation('softmax')(x) + + x = Interp([input_shape[1], input_shape[2]])(x) + x = Activation(activation)(x) model = Model(inputs=inp, outputs=x) - # Solver - sgd = SGD(lr=learning_rate, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, - loss='categorical_crossentropy', - metrics=['accuracy']) return model diff --git a/nnet/tf_layers.py b/nnet/tf_layers.py new file mode 100644 index 0000000..e7f3fbc --- /dev/null +++ b/nnet/tf_layers.py @@ -0,0 +1,61 @@ +import tensorflow as tf + + +def adaptive_pooling_2d(inputs, output_size: int, mode: str): + """ + Performs a pooling operation that results in a fixed size: + output_size x output_size. + + Used by spatial_pyramid_pool. Refer to appendix A in [1]. + + Args: + inputs: A 4D Tensor (B, H, W, C) + output_size: The output size of the pooling operation. + mode: The pooling mode {max, avg} + + Returns: + A list of tensors, for each output bin. + The list contains output_size * output_size elements, where + each elment is a Tensor (N, C). + + References: + [1] He, Kaiming et al (2015): + Spatial Pyramid Pooling in Deep Convolutional Networks + for Visual Recognition. + https://arxiv.org/pdf/1406.4729.pdf. + + Ported from: https://github.com/luizgh/Lasagne/commit/c01e3d922a5712ca4c54617a15a794c23746ac8c + """ + inputs_shape = tf.shape(inputs) + batch = tf.cast(tf.gather(inputs_shape, 0), tf.int32) + h = tf.cast(tf.gather(inputs_shape, 1), tf.int32) + w = tf.cast(tf.gather(inputs_shape, 2), tf.int32) + channels = tf.cast(tf.gather(inputs_shape, 3), tf.int32) + if mode == 'max': + pooling_op = tf.reduce_max + elif mode == 'avg': + pooling_op = tf.reduce_mean + else: + msg = "Mode must be either 'max' or 'avg'. Got '{0}'" + raise ValueError(msg.format(mode)) + result = [] + n = output_size + for row in range(output_size): + for col in range(output_size): + # start_h = floor(row / n * h) + start_h = tf.cast( + tf.floor(tf.multiply(tf.divide(row, n), tf.cast(h, tf.float32))), tf.int32) + # end_h = ceil((row + 1) / n * h) + end_h = tf.cast( + tf.ceil(tf.multiply(tf.divide((row + 1), n), tf.cast(h, tf.float32))), tf.int32) + # start_w = floor(col / n * w) + start_w = tf.cast( + tf.floor(tf.multiply(tf.divide(col, n), tf.cast(w, tf.float32))), tf.int32) + # end_w = ceil((col + 1) / n * w) + end_w = tf.cast( + tf.ceil(tf.multiply(tf.divide((col + 1), n), tf.cast(w, tf.float32))), tf.int32) + pooling_region = inputs[:, start_h:end_h, start_w:end_w, :] + pool_result = pooling_op( + pooling_region, axis=(1, 2), keepdims=True) + result.append(pool_result) + return tf.reshape(tf.concat(result, axis=1), [batch, output_size, output_size, channels]) diff --git a/pspnet.py b/pspnet.py index 33ad912..5c2f718 100755 --- a/pspnet.py +++ b/pspnet.py @@ -6,9 +6,10 @@ import numpy as np from scipy import misc, ndimage from keras import backend as K -from keras.models import model_from_json, load_model +from keras.models import model_from_json, load_model, Model import tensorflow as tf -import layers_builder as layers +from nnet.model import build_pspnet +from nnet.keras_layers import Interp, AdaptivePooling2D from python_utils import utils from python_utils.preprocessing import preprocess_img from keras.utils.generic_utils import CustomObjectScope @@ -27,15 +28,15 @@ def __init__(self, nb_classes, resnet_layers, input_shape, weights): if 'pspnet' in weights: if os.path.isfile(json_path) and os.path.isfile(h5_path): print("Keras model & weights found, loading...") - with CustomObjectScope({'Interp': layers.Interp}): + with CustomObjectScope({'Interp': Interp, 'AdaptivePooling2D': AdaptivePooling2D}): with open(json_path, 'r') as file_handle: self.model = model_from_json(file_handle.read()) self.model.load_weights(h5_path) else: print("No Keras model & weights found, import from npy weights.") - self.model = layers.build_pspnet(nb_classes=nb_classes, - resnet_layers=resnet_layers, - input_shape=self.input_shape) + self.model = build_pspnet(nb_classes=nb_classes, + resnet_layers=resnet_layers, + input_shape=self.input_shape) self.set_npy_weights(weights) else: print('Load pre-trained weights') From 9b5e89d74a051e59a0479a97ba3ced51cd40c03e Mon Sep 17 00:00:00 2001 From: mrlzla Date: Mon, 12 Mar 2018 12:51:30 +0300 Subject: [PATCH 2/2] Change interface of Interp layer in order to successfully save model to file --- nnet/keras_layers.py | 27 ++++++++++++++++----------- nnet/model.py | 25 +++++++++++-------------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/nnet/keras_layers.py b/nnet/keras_layers.py index 875f5d6..880601d 100644 --- a/nnet/keras_layers.py +++ b/nnet/keras_layers.py @@ -1,11 +1,15 @@ from keras import layers -from .tf_layers import adaptive_pooling_2d from keras.backend import tf as ktf +from .tf_layers import adaptive_pooling_2d class Interp(layers.Layer): + """Bilinear interpolation + __call__ Takes two params. First param is layer we need to resize. + Second param is tensor which shape is target. + """ - def __init__(self, new_size, **kwargs): + def __init__(self, new_size=None, **kwargs): self.new_size = new_size super(Interp, self).__init__(**kwargs) @@ -13,27 +17,28 @@ def build(self, input_shape): super(Interp, self).build(input_shape) def call(self, inputs, **kwargs): - new_height, new_width = self.new_size - resized = ktf.image.resize_images(inputs, [new_height, new_width], + assert(len(inputs) == 2) + shape = ktf.shape(inputs[1]) + new_height, new_width = shape[1], shape[2] + resized = ktf.image.resize_images(inputs[0], [new_height, new_width], align_corners=True) return resized def compute_output_shape(self, input_shape): - return tuple([None, self.new_size[0], self.new_size[1], input_shape[3]]) + return tuple([input_shape[0][0], None, None, input_shape[0][3]]) def get_config(self): config = super(Interp, self).get_config() - config['new_size'] = self.new_size return config class AdaptivePooling2D(layers.Layer): - def __init__(self, output_size, mode='avg', **kwargs): + def __init__(self, out_size, mode='avg', **kwargs): if mode not in ['avg', 'max']: msg = "Mode must be either 'max' or 'avg'. Got '{0}'" raise ValueError(msg.format(mode)) - self.output_size = output_size + self.out_size = out_size self.mode = mode super(AdaptivePooling2D, self).__init__(**kwargs) @@ -41,13 +46,13 @@ def build(self, input_shape): super(AdaptivePooling2D, self).build(input_shape) def call(self, inputs, **kwargs): - return adaptive_pooling_2d(inputs, self.output_size, self.mode) + return adaptive_pooling_2d(inputs, self.out_size, self.mode) def compute_output_shape(self, input_shape): - return tuple([input_shape[0], self.output_size, self.output_size, input_shape[3]]) + return tuple([input_shape[0], self.out_size, self.out_size, input_shape[3]]) def get_config(self): config = super(AdaptivePooling2D, self).get_config() - config['output_size'] = self.output_size + config['out_size'] = self.out_size config['mode'] = self.mode return config diff --git a/nnet/model.py b/nnet/model.py index d81a90b..cb1a292 100644 --- a/nnet/model.py +++ b/nnet/model.py @@ -1,8 +1,8 @@ from __future__ import print_function from math import ceil from keras import layers -from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D -from keras.layers import BatchNormalization, Activation, Input, Dropout, ZeroPadding2D, Lambda +from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D +from keras.layers import BatchNormalization, Activation, Input, Dropout, ZeroPadding2D from keras.layers.merge import Concatenate, Add from keras.models import Model from keras.optimizers import SGD @@ -158,19 +158,19 @@ def ResNet(inp, layers): return res -def interp_block(prev_layer, level, output_size): +def interp_block(prev_block, level): names = [ "conv5_3_pool" + str(level) + "_conv", "conv5_3_pool" + str(level) + "_conv_bn" ] - prev_layer = AdaptivePooling2D(level, mode='avg')(prev_layer) + prev_layer = AdaptivePooling2D(level, mode='avg')(prev_block) prev_layer = Conv2D(512, (1, 1), strides=(1, 1), name=names[0], use_bias=False)(prev_layer) prev_layer = BN(name=names[1])(prev_layer) prev_layer = Activation('relu')(prev_layer) - prev_layer = Interp(output_size)(prev_layer) + prev_layer = Interp()([prev_layer, prev_block]) return prev_layer @@ -178,16 +178,14 @@ def build_pyramid_pooling_module(res): """Build the Pyramid Pooling Module.""" # ---PSPNet concat layers with Interpolation - shape = ktf.shape(res) - output_size = (shape[1], shape[2]) - interp_block1 = interp_block(res, 1, output_size) - interp_block2 = interp_block(res, 2, output_size) - interp_block3 = interp_block(res, 3, output_size) - interp_block6 = interp_block(res, 6, output_size) + interp_block1 = interp_block(res, 1) + interp_block2 = interp_block(res, 2) + interp_block3 = interp_block(res, 3) + interp_block6 = interp_block(res, 6) # concat all these layers. resulted # shape=(1,feature_map_size_x,feature_map_size_y,4096) - first = Interp(output_size)(res) + first = Interp()([res, res]) # needed because didn't work concatenate res = Concatenate()([first, interp_block6, @@ -203,7 +201,6 @@ def build_pspnet(nb_classes, resnet_layers, input_shape, activation='softmax'): resnet_layers, input_shape, nb_classes)) inp = Input((None, None, 3)) - input_shape = ktf.shape(inp) res = ResNet(inp, layers=resnet_layers) psp = build_pyramid_pooling_module(res) @@ -216,7 +213,7 @@ def build_pspnet(nb_classes, resnet_layers, input_shape, activation='softmax'): x = Conv2D(nb_classes, (1, 1), strides=(1, 1), name="conv6")(x) - x = Interp([input_shape[1], input_shape[2]])(x) + x = Interp()([x, inp]) x = Activation(activation)(x) model = Model(inputs=inp, outputs=x)