From 05c4261e67baa5781bef323100292c056b3afa0c Mon Sep 17 00:00:00 2001 From: Tero Keski-Valkama Date: Tue, 4 Jul 2017 19:38:53 +0300 Subject: [PATCH] Upgraded Tensorflow version. --- debug_model.py | 199 ++++++++++++++++++++++++++++++++++++++++++ generate.py | 4 +- inspect_checkpoint.py | 69 +++++++++++++++ model.py | 4 +- params.py | 4 +- plot_generation.m | 9 +- train.py | 4 +- 7 files changed, 280 insertions(+), 13 deletions(-) create mode 100755 debug_model.py create mode 100755 inspect_checkpoint.py diff --git a/debug_model.py b/debug_model.py new file mode 100755 index 0000000..36f79c6 --- /dev/null +++ b/debug_model.py @@ -0,0 +1,199 @@ +#!/usr/bin/python + +import matplotlib +from boto.gs.acl import SCOPE +from atk import Layer +matplotlib.use('Agg') +import pylab +import math + +import tensorflow as tf +from tensorflow.python.ops.math_ops import real +import numpy as np + +import random +import json +import itertools +import sys + +import ops + +def mu_law(x, mu): + ml = tf.sign(x) * tf.log(mu * tf.abs(x) + 1.0) / tf.log(mu + 1.0) + # Scaling between -128 and 128 integers. + return tf.cast((ml + 1.0) / 2.0 * mu + 0.5, tf.int32) + +# value shape is [width, quantization_channels] +# filters shape is [filter_width, quantization_channels, dilation_channels] +# In some implementations dilation_channels is 256. +def causal_atrous_conv1d(value, filters, rate, padding): + # Using height in 2-D as the 1-D. Adding the batch dimension also. + # Note that for filters using 'SAME' padding, padding zeros are added to the end of the input. + # This means that for causal convolutions, we must shift the output right. + # add zeros to the start and remove the future values from the end. + + value_with_batch = tf.expand_dims(value, 0) + # Normally we would use this, but in practice CuDNN does not have implementations for the strided convolutions + # so this only works for CPU. + # value_2d = tf.expand_dims(value_with_batch, 2) + # filters_2d = tf.expand_dims(filters, 1) + # atrous_conv = tf.nn.atrous_conv2d(value_2d, filters_2d, rate, padding) + # # Squeezing out the width and the batch dimensions. + # atr_conv_1d = tf.squeeze(atrous_conv, [0, 2]) + width = tf.shape(value)[0] + dilation_channels = tf.shape(filters)[2] + # filter_shape = tf.shape(filters) + # filter_width = filter_shape[0] + # filter_width_up = filter_width + (filter_width - 1) * (rate - 1) + # pad_width = filter_width_up - 1 + # pad_left = pad_width // 2 + # pad_right = pad_width - pad_left + # # We want to shift the result so that acausal values are removed. + # # Any value in the output that makes use of right padding values are acausal. + # # So, we remove pad_right elements from the end, and add as many zeros to the beginning. + # dilation_channels = tf.shape(atr_conv_1d)[1] + # causal = tf.pad(tf.slice(atr_conv_1d, [0, 0], [width - pad_right, dilation_channels]), + # [[pad_right, 0], [0, 0]]) + # return causal + + # Instead we use this implementation from Igor Babuschkin: + atr_conv_1d_with_batch = ops.causal_conv(value_with_batch, filters, rate) + atr_conv_1d = tf.squeeze(atr_conv_1d_with_batch, [0]) + # atr_conv_1d shape is [width, dilation_channels] + + #return atr_conv_1d + return tf.zeros([width, dilation_channels]) + +# Returns a tuple of output to the next layer and skip output. +# The shape of x is [width, dense_channels] +def gated_unit(x, dilation, parameters, layer_index): + #tf.histogram_summary('{}_x'.format(layer_index), x) + + filter_width = parameters['filter_width'] + dense_channels = parameters['dense_channels'] + dilation_channels = parameters['dilation_channels'] + quantization_channels = parameters['quantization_channels'] + + w1 = tf.Variable(tf.random_normal([filter_width, dense_channels, dilation_channels], stddev=0.05), + dtype=tf.float32, name='w1') + w2 = tf.Variable(tf.random_normal([filter_width, dense_channels, dilation_channels], stddev=0.05), + dtype=tf.float32, name='w2') + cw = tf.Variable(tf.random_normal([1, dilation_channels, dense_channels], mean=1.0, stddev=0.05), + dtype=tf.float32, name='cw') + + #tf.histogram_summary('{}_w1'.format(layer_index), w1) + #tf.histogram_summary('{}_w2'.format(layer_index), w2) + #tf.histogram_summary('{}_cw'.format(layer_index), cw) + + with tf.name_scope('causal_atrous_convolution'): + dilated1 = causal_atrous_conv1d(x, w1, dilation, 'SAME') + dilated2 = causal_atrous_conv1d(x, w2, dilation, 'SAME') + with tf.name_scope('gated_unit'): + z = tf.multiply(tf.tanh(dilated1), tf.sigmoid(dilated2)) + # dilated1, dilated2, z shapes are [width, dilation_channels] + skip = tf.squeeze(tf.nn.conv1d(tf.expand_dims(z, 0), cw, 1, 'SAME'), [0]) + #tf.histogram_summary('{}_skip'.format(layer_index), skip) + output = skip + x + #tf.histogram_summary('{}_output'.format(layer_index), output) + # combined and output shapes are [width, dense_channels] + return (output, skip) + +# Returns a tuple of (output, non-softmaxed-logits output) +# The non-softmaxed output is used for the loss calculation. +# The shape of x is [width, quantization_channels] +# The shape of output is [width, quantization_channels] +# Dilations is an array of [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1, 2, ..., 512] +def layers(x, parameters): + dilations = parameters['dilations'] + quantization_channels = parameters['quantization_channels'] + dense_channels = parameters['dense_channels'] + width = parameters['sample_length'] + + co_dense = tf.Variable(tf.random_normal([1, quantization_channels, dense_channels], mean=1.0, stddev=0.05), + dtype=tf.float32, name='dense_w') + + next_input = tf.squeeze(tf.nn.conv1d(tf.expand_dims(x, 0), co_dense, 1, 'SAME'), [0]) # , use_cudnn_on_gpu=False not supported... + skip_connections = [] + for (i, dilation) in enumerate(dilations): + with tf.name_scope('layer_{}'.format(i)): + print "Creating layer {}".format(i) + #(output, skip) = gated_unit(next_input, dilation, parameters, i) + output = tf.zeros([width, dense_channels]) + skip = tf.zeros([width, dense_channels]) + # output and skip shapes are [width, dense_channels] + next_input = output + skip_connections.append(skip) + sys.stdout.flush() + #skips_tensor = tf.nn.relu(tf.pack(skip_connections, 2)) + + #co1 = tf.Variable(tf.random_normal([1, 1, len(dilations), 1], mean=1.0, stddev=0.05), + # dtype=tf.float32, name='co1') + + #weighted_skips = tf.squeeze(tf.nn.conv2d(tf.expand_dims(skips_tensor, 0), co1, [1, 1, 1, 1], padding = 'SAME'), [0, 3]) + weighted_skips = tf.zeros([width, dense_channels]) + + # weighted_skips shape is [width, dense_channels] + #relu1 = tf.nn.relu(weighted_skips) + + #co2 = tf.Variable(tf.random_normal([1, dense_channels, 256], mean=1.0, stddev=0.05), + # dtype=tf.float32, name='co2') + + #raw_output = tf.squeeze(tf.nn.conv1d(tf.expand_dims(relu1, 0), co2, 1, 'SAME'), [0]) + raw_output = tf.zeros([width, quantization_channels]) + # raw_output shape is [width, quantization_channels] + #output = tf.nn.softmax(raw_output) + sm_outputs = [] + for i in range(width): + sm_outputs.append(tf.nn.softmax(tf.slice(raw_output, [i, 0], [1, -1]))) + output = tf.pack(sm_outputs, 0) + #output = tf.zeros([width, quantization_channels]) + return (output, raw_output) + +def create(parameters): + quantization_channels = parameters['quantization_channels'] + sample_length = parameters['sample_length'] + input = tf.placeholder(tf.float32, shape=(sample_length), name='input') + y = input + x = tf.pad(tf.slice(input, [0], [tf.shape(input)[0] - 1]), [[1, 0]]) + width = tf.shape(x)[0] + # x is shifted right by one and padded by zero. + mu_lawd = mu_law(x, float(quantization_channels - 1)) + shifted_mu_law_x = tf.one_hot(mu_lawd, quantization_channels) + + classes_y = mu_law(y, quantization_channels - 1) + (output, raw_output) = layers(shifted_mu_law_x, parameters) + #output = tf.zeros([width,quantization_channels]) + #raw_output = tf.zeros([width,quantization_channels]) + cost = tf.nn.sparse_softmax_cross_entropy_with_logits(raw_output, classes_y, name='cost') + + tvars = tf.trainable_variables() + #gradients = tf.gradients(cost, tvars) + # grads, _ = tf.clip_by_global_norm(gradients, parameters['clip_gradients']) + optimizer = tf.train.AdamOptimizer(learning_rate = parameters['learning_rate']) + + #train_op = optimizer.apply_gradients(zip(gradients, tvars)) + train_op = x + tf.add_check_numerics_ops() + + model = { + 'output': output, + 'optimizer': train_op, + 'x': input, + 'cost': cost + } + return model + +def create_generative_model(parameters): + quantization_channels = parameters['quantization_channels'] + input = tf.placeholder(tf.float32, name='input') + mu_law_input = tf.one_hot(mu_law(input, float(quantization_channels - 1)), quantization_channels) + + (full_generated_output, _) = layers(mu_law_input, parameters) + # Generated output is only the last predicted distribution + generated_output = tf.squeeze(tf.slice(full_generated_output, [tf.shape(full_generated_output)[0] - 1, 0], [1, -1]), [0]) + + model = { + 'generated_output': generated_output, + 'x': input + } + return model diff --git a/generate.py b/generate.py index a9cd895..f1e980b 100755 --- a/generate.py +++ b/generate.py @@ -67,7 +67,7 @@ [probabilities] = sess.run([generative_model['generated_output']], feed_dict = { generative_model['mu_law_input']: mu_law_input }) - image.append(probabilities) +# image.append(probabilities) def choose_value(sample): sample = np.asarray(sample) @@ -87,7 +87,7 @@ def choose_value(sample): axis=0))[1:, :] output_signal = np.append(output_signal, next_val) - export_to_octave.save('image.mat', 'i', image) +# export_to_octave.save('image.mat', 'i', image) wav = np.asarray(map(int, output_signal * (2.**15)), dtype=np.int16) wav2 = np.asarray(map(int, signal * (2.**15)), dtype=np.int16) export_to_octave.save('sound.mat', 's', wav) diff --git a/inspect_checkpoint.py b/inspect_checkpoint.py new file mode 100755 index 0000000..8720253 --- /dev/null +++ b/inspect_checkpoint.py @@ -0,0 +1,69 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""A simple script for inspect checkpoint files.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +import tensorflow as tf + +FLAGS = tf.app.flags.FLAGS +tf.app.flags.DEFINE_string("file_name", "", "Checkpoint filename") +tf.app.flags.DEFINE_string("tensor_name", "", "Name of the tensor to inspect") +tf.app.flags.DEFINE_bool("all_tensors", "False", + "If True, print the values of all the tensors.") + + +def print_tensors_in_checkpoint_file(file_name, tensor_name): + """Prints tensors in a checkpoint file. + If no `tensor_name` is provided, prints the tensor names and shapes + in the checkpoint file. + If `tensor_name` is provided, prints the content of the tensor. + Args: + file_name: Name of the checkpoint file. + tensor_name: Name of the tensor in the checkpoint file to print. + """ + try: + reader = tf.train.NewCheckpointReader(file_name) + if FLAGS.all_tensors: + var_to_shape_map = reader.get_variable_to_shape_map() + for key in var_to_shape_map: + print("tensor_name: ", key) + print(reader.get_tensor(key)) + elif not tensor_name: + print(reader.debug_string().decode("utf-8")) + else: + print("tensor_name: ", tensor_name) + print(reader.get_tensor(tensor_name)) + except Exception as e: # pylint: disable=broad-except + print(str(e)) + if "corrupted compressed block contents" in str(e): + print("It's likely that your checkpoint file has been compressed " + "with SNAPPY.") + + +def main(unused_argv): + if not FLAGS.file_name: + print("Usage: inspect_checkpoint --file_name=checkpoint_file_name " + "[--tensor_name=tensor_to_print]") + sys.exit(1) + else: + print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name) + +if __name__ == "__main__": + tf.app.run() diff --git a/model.py b/model.py index 6abe98f..0dafa68 100755 --- a/model.py +++ b/model.py @@ -107,7 +107,7 @@ def gated_unit(x, dilation, parameters, layer_index, noise): dilated1 = causal_atrous_conv1d(x, w1, dilation, 'SAME') dilated2 = causal_atrous_conv1d(x, w2, dilation, 'SAME') with tf.name_scope('gated_unit'): - z = tf.mul(tf.tanh(dilated1), tf.sigmoid(dilated2)) + z = tf.multiply(tf.tanh(dilated1), tf.sigmoid(dilated2)) # dilated1, dilated2, z shapes are [width, dilation_channels] output = conv1d(z, cw) + x # combined and output shapes are [width, dense_channels] @@ -176,7 +176,7 @@ def create(parameters): # regularization loss with the normal loss. reg_loss = reg_loss / 100000.0 - cost = tf.nn.sparse_softmax_cross_entropy_with_logits(raw_output, classes_y, name='cost') + cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=raw_output, labels=classes_y, name='cost') cost_plus_regularization = cost + reg_loss tvars = tf.trainable_variables() diff --git a/params.py b/params.py index fcf204e..2cd79f1 100644 --- a/params.py +++ b/params.py @@ -1,5 +1,5 @@ parameters = { - 'learning_rate': 0.04, + 'learning_rate': 0.0005, 'display_step': 10, 'input_noise': 0.005, 'input_salt_and_pepper_noise': 0.005, @@ -17,5 +17,5 @@ 'sample_length': 1023 * 4 + 1, 'training_length': 1024 * 512, 'clip_gradients': 1000.0, - 'temperature': 0.7 + 'temperature': 1.0 } diff --git a/plot_generation.m b/plot_generation.m index a9565f9..803c1e4 100644 --- a/plot_generation.m +++ b/plot_generation.m @@ -1,12 +1,11 @@ load("sound.mat") -load("image.mat") +#load("image.mat") subplot(1,1,1) size(s) seed_length = 1024 * 512; -s = s(:,seed_length:size(s)(2)); -subplot(1, 2, 1) +s = s(:,seed_length-500:size(s)(2)); plot(s) soundsc(s, 48000) -subplot(1, 2, 2) -imagesc(flipud(i')) +#subplot(1, 2, 2) +#imagesc(flipud(i')) wavwrite(s, 48000, "generated.wav") diff --git a/train.py b/train.py index 645b8af..58578a7 100755 --- a/train.py +++ b/train.py @@ -73,7 +73,7 @@ def choose_value(sample): sampled = np.random.choice(np.arange(parameters['quantization_channels']), p=sample) return operations.de_mu_law(sampled, float(parameters['quantization_channels'] - 1)) - writer = tf.train.SummaryWriter("logs", sess.graph) + writer = tf.summary.FileWriter("logs", sess.graph) iter = 1 train_error_trend = [] @@ -183,4 +183,4 @@ def choose_value(sample): # Returning the last loss value for hyper parameter search return last_loss - \ No newline at end of file +