art.py

"""
Implementation of A Neural Algorithm of Artistic Style

authors: Jesse Mu, Andrew Francl
"""

import matplotlib
import numpy as np

import caffe

# Numerical computation
from scipy import optimize
# np.dot is too slow. Use blas.sgemm instead
from scipy.linalg import blas

# IO
import skimage
from skimage.io import imsave
from skimage import transform

# Util
from itertools import izip_longest
from datetime import datetime
import os
import glob

# Make matplotlib not use X11
matplotlib.use('Agg')
# Set Caffe gpu mode
caffe.set_mode_gpu()

# Constants
VGG_MODEL = './models/VGG_ILSVRC_19_layers.caffemodel'
VGG_PROTOTXT = './models/VGG_ILSVRC_19_layers_deploy.prototxt'

MEAN_PIXEL = np.array([104.00698793, 116.66876762, 122.67891434])

SC_RATIO = 1e4

# NOTE: We use the blob name instead of the layer name according to the model
# prototxt. Layers are specified in the paper.
CONTENT_LAYERS = ['conv4_2']
STYLE_LAYERS = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
# TODO: Make these weights configurable
CONTENT_WEIGHTS = [1. / len(CONTENT_LAYERS)] * len(CONTENT_LAYERS)
STYLE_WEIGHTS = [1. / len(STYLE_LAYERS)] * len(STYLE_LAYERS)


class Art(object):
    """
    Your one-stop shop for all things neural style
    """
    def __init__(self, net, args):
        self.net = net

        self.transformer = self.create_transformer()

        # These values will be initialized when
        # network is setup (set_style_targets, set_content_targets)
        self.style_targets = None
        self.content_target = None

        # Get reversed list of layer pairs for backprop.
        # Placed in init to save running time in optimization fn
        # Blobs is an ordereddict, so keys are in order
        reversed_layers = []
        for layer in self.net.blobs.keys():
            if layer in STYLE_LAYERS or layer in CONTENT_LAYERS:
                reversed_layers.append(layer)

        reversed_layers.reverse()

        self.reversed_pairs = list(izip_longest(reversed_layers,
                                                reversed_layers[1:]))

        self.max_width = args.width

        self.init = args.init

        self.print_rate = args.print_rate

        self.style_scale = args.style_scale

        # TODO: Get rid of this when you've kept things separate.
        self.args = args

        # Set counter for printing progress in graddesc
        self.iter = 0

    def create_transformer(self):
        """
        Create the preprocessor and deprocessor using the default settings for
        the VGG-19 network.
        """
        # Give transformer necessary imput shape. Should be specified from
        # argparse arguments when creating the net
        transformer = caffe.io.Transformer(
            {'data': self.net.blobs['data'].data.shape}
        )
        # Order of the channels in the input data (not sure why necessary)
        transformer.set_transpose('data', (2, 0, 1))
        # Use BGR rather than RGB
        transformer.set_channel_swap('data', (2, 1, 0))
        # Subtract mean pixel
        transformer.set_mean('data', MEAN_PIXEL)
        # Use 8bit image values
        transformer.set_raw_scale('data', 255)

        return transformer

    def resize_image(self, img, scale=1.0):
        """
        Resize image to self.max_width, with varying height.
        """
        assert img.shape[2] == 3

        oldwidth = float(img.shape[1])
        oldheight = float(img.shape[0])
        newheight = int(((self.max_width / oldwidth) * oldheight) * scale)
        newwidth = int(self.max_width * scale)
        return transform.resize(img, (newheight, newwidth, 3))

    def resize_caffes(self, img):
        """
        Resize the caffe net and transformer input blobs to accept the scaled
        image.
        """
        new_size = (1, img.shape[2], img.shape[0], img.shape[1])
        self.net.blobs['data'].reshape(*new_size)  # Unpack for mult args
        self.transformer.inputs['data'] = new_size

    def set_style_targets(self, imgs, weights):
        """
        Params
        ======
        imgs : List<str>
            Filename of the image to load in.
        """
        target_sl_list = []
        for sl, _ in zip(STYLE_LAYERS, STYLE_WEIGHTS):

            sl_dim0 = self.net.blobs[sl].data[0].shape[0]
            target_sl = np.zeros((sl_dim0, sl_dim0))

            for img, imgweight in zip(imgs, weights):
                # Preprocess image, load into net
                stylei = caffe.io.load_image(img)
                # Resize image, set net and transformer shapes accordingly
                scaled = self.resize_image(stylei, self.style_scale)
                self.resize_caffes(scaled)

                stylei_pp = self.transformer.preprocess('data', scaled)
                self.net.blobs['data'].data[...] = stylei_pp

                self.net.forward()

                layer = self.net.blobs[sl].data[0].copy()  # Get one batch?
                # Expand style layer to 2d array
                layer = np.reshape(
                    layer,
                    (layer.shape[0], layer.shape[1] * layer.shape[2])
                )

                gram = self._gram(layer)

                target_sl += gram * imgweight

                target_sl_list.append(gram)

        self.style_targets = target_sl_list

    def set_content_target(self, img):
        """
        Create content representation of image and set as the content target.
        """
        # XXX: Assume only one content layer
        cl = CONTENT_LAYERS[0]
        contenti = caffe.io.load_image(img)
        # Resize image, set net and transformer shapes accordingly
        scaled = self.resize_image(contenti)
        self.resize_caffes(scaled)

        contenti_pp = self.transformer.preprocess('data', scaled)
        self.net.blobs['data'].data[...] = contenti_pp
        self.net.forward()

        self.content_target = self.net.blobs[cl].data[0].copy()
        # Get contenti_pp (after transformer)
        self.content_target = (
            np.reshape(
                self.content_target,
                (self.content_target.shape[0],
                 self.content_target.shape[1] * self.content_target.shape[2]))
        )

    def random_image(self):
        """
        Compute a random multicolor noise image.

        We assume that the user has called set_content_target
        because we obtain the content representation from the
        net input blob.
        """
        content_shape = self.net.blobs['data'].data.shape[1:]
        randi = (np.random.rand(*content_shape) * 255)
        return (randi.transpose() - MEAN_PIXEL).transpose()

    def _gram(self, layer):
        """
        Compute gram matrix; just the dot product of the layer and its
        transform
        """
        gram = blas.sgemm(1.0, layer, layer.T)
        return gram

    def _mse(self, A, B):
        """Mean squared error."""
        return ((A - B) ** 2).mean()

    def style_lag(self, noisies, grams, i, compute_grad=False):
        """
        Compute style losses and gradients for all gram matrices

        This is compressed into one function to save intermediate computations.

        Is assumed that gram matrices and self.style_targets correspond to
        identical layers.
        """
        # Get everything.
        style_noisy = noisies[i]
        style_gram = grams[i]
        style_target = self.style_targets[i]
        weight = STYLE_WEIGHTS[i]

        diff = (style_gram - style_target)
        size_c = (1. / ((style_noisy.shape[0] ** 2) *
                  (style_noisy.shape[1] ** 2)))
        loss = (size_c / 4) * (diff**2).sum() * weight

        if compute_grad:
            gradient = (size_c * blas.sgemm(1.0, diff, style_noisy) *
                        (style_noisy > 0) * weight)
            return loss, gradient

        return loss, None

    def content_lag(self, content_noisy, compute_grad=False):
        """
        Compute content loss and gradient.

        This is compressed into one function to save intermediate computations.
        """
        diff = (content_noisy - self.content_target)
        loss = .5 * (diff ** 2).sum()
        if compute_grad:
            gradient = diff * (content_noisy > 0)
            return loss, gradient
        return loss, None

    def loss_and_gradient(self, x):
        debug_print("Running loss and gradient")
        x_reshaped = np.reshape(x, self.net.blobs['data'].data.shape[1:])

        # Run the net on the candidate
        self.net.blobs['data'].data[...] = x_reshaped.copy()
        self.net.forward()

        content_noisy = self.net.blobs[CONTENT_LAYERS[0]].data[0].copy()
        content_noisy = np.reshape(
            content_noisy,
            (content_noisy.shape[0],
             content_noisy.shape[1] * content_noisy.shape[2])
        )

        # COMPUTE LOSSES
        # For the first iteration, we don't care about the gradients.

        # Compute content losses.
        content_loss, _ = self.content_lag(content_noisy)
        loss = content_loss

        # Collect style layers and gram matrices
        style_noisies = map(
            lambda layer: self.net.blobs[layer].data[0].copy(),
            STYLE_LAYERS
        )
        style_reshaped = map(
            lambda n: np.reshape(n, (n.shape[0], n.shape[1] * n.shape[2])),
            style_noisies
        )
        style_grams = [self._gram(m) for m in style_reshaped]

        # Compute style losses and weight by their ratio
        total_style_loss = 0
        for i in xrange(len(STYLE_WEIGHTS)):
            total_style_loss += self.style_lag(
                style_reshaped, style_grams, i, compute_grad=False
            )[0]

        loss += total_style_loss * SC_RATIO

        # Compute backprop layer by layer to obtain gradients.
        # self.net.blobs is an ordered dict, so reversed makes sense
        # Initialize net to empty
        self.net.blobs[self.reversed_pairs[-1][0]].diff[:] = 0
        for curr, prev in self.reversed_pairs:
            # Alias this for sanity
            curr_grad = self.net.blobs[curr].diff[0]

            try:
                style_index = STYLE_LAYERS.index(curr)
            except ValueError:
                # Nope, not in style layers
                style_index = -1
            if style_index > -1:
                gradient = self.style_lag(
                    style_reshaped, style_grams, style_index, compute_grad=True
                )[1]
                curr_grad += np.reshape(gradient, curr_grad.shape) * SC_RATIO
            else:
                try:
                    content_index = CONTENT_LAYERS.index(curr)
                except ValueError:
                    # Not in style layers
                    content_index = -1
                if content_index > -1:
                    # We assume weight is 1 since we're not changing this model
                    gradient = self.content_lag(
                        content_noisy, compute_grad=True
                    )[1]
                    gradient = np.reshape(gradient, curr_grad.shape)
                    curr_grad += gradient

            # Compute the gradient
            self.net.backward(start=curr, end=prev)

        final_grad = self.net.blobs['data'].diff[0]

        # Flatten for optimization
        return loss, final_grad.flatten().astype(np.float64)

    def print_prog(self, x):
        """
        Save and print progress every self.print_rate iterations.
        """
        if (self.iter % self.print_rate) == 0:
            debug_print("gdesc iteration {}".format(str(self.iter)))
            new_img = self.transformer.deprocess(
                'data',
                x.reshape(self.net.blobs['data'].data.shape)
            )
            imsave(
                '{}/iter-{}.jpg'.format(self.dirname, self.iter),
                skimage.img_as_ubyte(new_img)
            )
            imsave(
                '{}/final.jpg'.format(self.dirname, self.iter),
                skimage.img_as_ubyte(new_img)
            )
        self.iter += 1

    def go(self, maxiter=512):
        """
        This is where the magic happens.

        Return the image resulting from gradient descent for maxiter
        iterations
        """
        # Init random noise image
        debug_print("Running go")
        if args.init == 'rand':
            img = self.random_image()
        else:
            default = caffe.io.load_image(self.args.content_image)
            scaled = self.resize_image(default)
            self.resize_caffes(scaled)
            img = self.transformer.preprocess('data', scaled)

        # Compute bounds for gradient descent, borrowed from
        # fzliu/style-transfer
        data_min = -self.transformer.mean["data"][:, 0, 0]
        data_max = data_min + self.transformer.raw_scale["data"]
        data_bounds = [(data_min[0], data_max[0])] * (img.size / 3) + \
                      [(data_min[1], data_max[1])] * (img.size / 3) + \
                      [(data_min[2], data_max[2])] * (img.size / 3)

        debug_print("Starting grad descent")

        x, f, d = optimize.fmin_l_bfgs_b(
            self.loss_and_gradient,
            img.flatten(),
            bounds=data_bounds,
            fprime=None,  # We'll use loss_and_gradient
            maxiter=maxiter,
            callback=self.print_prog,
        )

        x = np.reshape(x, self.net.blobs['data'].data[0].shape)

        return self.transformer.deprocess('data', x)


def debug_print(msg, verbose=True):
    """
    Print msg only if verbose flag is True.
    """
    if verbose:
        print "{}: {}".format(datetime.now(), msg)


def main(args):
    """
    The main algorithm implementation function.
    """
    vgg = caffe.Net(
        VGG_PROTOTXT, VGG_MODEL, caffe.TEST,
    )

    style = Art(vgg, args)

    # Collect art from the wikiart folder
    if args.artist:
        try:
            args.style_images = glob.glob(
                './wikiart/{}/*'.format(args.artist)
            )
        except Exception as e:
            print "Couldn't get images for artist {}, check dir!".format(
                args.artist
            )
            raise e

    num_simages = len(args.style_images)

    if num_simages > 2:
        sample = args.style_images[:2]
    else:
        sample = args.style_images

    # Make the directory for this run
    raw_dirname = './img/{}__{}-w{}-{}'.format(
        args.content_image.split('.')[0],
        '_'.join(os.path.basename(s).split('.')[0] for s in sample),
        str(args.width),
        str(args.numiter)
    )

    # Get a unique dirname if the directory already exists
    dirname = raw_dirname
    unique = 1
    while os.path.exists(dirname):
        dirname = raw_dirname + '-' + str(unique)
        unique += 1

    debug_print("Starting {}".format(dirname))

    os.mkdir(dirname)

    # TODO this is bad, but whatever. save this dirname as an attr
    style.dirname = dirname

    # NOTE: For now, we assume style weights are equal
    style_image_weights = [1. / num_simages] * num_simages

    # Get style and content targets
    debug_print("Setting up content targets")
    style.set_content_target(args.content_image)
    debug_print("Setting up style targets")
    style.set_style_targets(args.style_images, style_image_weights)
    debug_print("Done initialization")

    # Get the candidate image
    debug_print("Running gradient descent...")
    new_img = style.go(maxiter=args.numiter)

    debug_print("Done gradient descent, saving image...")

    imsave(
        '{}/final.jpg'.format(dirname),
        skimage.img_as_ubyte(new_img)
    )

    debug_print("Done: Saved {}".format(dirname))

if __name__ == '__main__':
    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
    import sys

    parser = ArgumentParser(
        description="Neural art in Python.",
        formatter_class=ArgumentDefaultsHelpFormatter
    )

    parser.add_argument('content_image', help="Content image")
    parser.add_argument('style_images', nargs='*',
                        help="Style image(s)")
    parser.add_argument('-A', '--artist', type=str, default=None,
                        help=("Artist to imitate, Script will randomly "
                              "choose an artwork from this artist. Artist's "
                              "work must be saved in wikiart/artist_name!"))
    parser.add_argument('-n', '--numiter', type=int, default=512,
                        help="Number of iterations")
    parser.add_argument('-w', '--width', type=int, default=512,
                        help="Max image width")
    parser.add_argument('-i', '--init', choices=['rand', 'content'],
                        default='content',
                        help=("Initialize image from noise (rand) or original "
                              "image"))
    parser.add_argument('-p', '--print_rate', type=int, default=10,
                        help="How often to save intermediate images")
    parser.add_argument('-s', '--style_scale', type=float, default=1.0,
                        help=("Resize style image - changes resolution of "
                              "features"))
    # parser.add_argument('-c' '--color-transfer', action='store_true',
    #                   # help=("Apply color transfer algorithm to attempt to "
    #                         # "change style image to match color of the "
    #                         # "content image."))
    # TODO: Output location options?
    args = parser.parse_args()

    if args.artist and args.style_images:
        sys.exit("art.py: can't use both individual style "
                 "images and artist flag")
    if not args.artist and not args.style_images:
        sys.exit("art.py: need to specify either an artist or "
                 "style images")

    main(args)