Upgraded Tensorflow version.

keskival · Jul 4, 2017 · 05c4261 · 05c4261
1 parent bfc674d
commit 05c4261
Show file tree

Hide file tree

Showing 7 changed files with 280 additions and 13 deletions.
diff --git a/debug_model.py b/debug_model.py
@@ -0,0 +1,199 @@
+#!/usr/bin/python
+
+import matplotlib
+from boto.gs.acl import SCOPE
+from atk import Layer
+matplotlib.use('Agg')
+import pylab
+import math
+
+import tensorflow as tf
+from tensorflow.python.ops.math_ops import real
+import numpy as np
+
+import random
+import json
+import itertools
+import sys
+
+import ops
+
+def mu_law(x, mu):
+    ml = tf.sign(x) * tf.log(mu * tf.abs(x) + 1.0) / tf.log(mu + 1.0)
+    # Scaling between -128 and 128 integers.
+    return tf.cast((ml + 1.0) / 2.0 * mu + 0.5, tf.int32)
+
+# value shape is [width, quantization_channels]
+# filters shape is [filter_width, quantization_channels, dilation_channels]
+# In some implementations dilation_channels is 256.
+def causal_atrous_conv1d(value, filters, rate, padding):
+    # Using height in 2-D as the 1-D. Adding the batch dimension also.
+    # Note that for filters using 'SAME' padding, padding zeros are added to the end of the input.
+    # This means that for causal convolutions, we must shift the output right.
+    # add zeros to the start and remove the future values from the end.
+
+    value_with_batch = tf.expand_dims(value, 0)
+    # Normally we would use this, but in practice CuDNN does not have implementations for the strided convolutions
+    # so this only works for CPU.
+    # value_2d = tf.expand_dims(value_with_batch, 2)
+    # filters_2d = tf.expand_dims(filters, 1)
+    # atrous_conv = tf.nn.atrous_conv2d(value_2d, filters_2d, rate, padding)
+    # # Squeezing out the width and the batch dimensions.
+    # atr_conv_1d = tf.squeeze(atrous_conv, [0, 2])
+    width = tf.shape(value)[0]
+    dilation_channels = tf.shape(filters)[2]
+    # filter_shape = tf.shape(filters)
+    # filter_width = filter_shape[0]
+    # filter_width_up = filter_width + (filter_width - 1) * (rate - 1)
+    # pad_width = filter_width_up - 1
+    # pad_left = pad_width // 2
+    # pad_right = pad_width - pad_left
+    # # We want to shift the result so that acausal values are removed.
+    # # Any value in the output that makes use of right padding values are acausal.
+    # # So, we remove pad_right elements from the end, and add as many zeros to the beginning.
+    # dilation_channels = tf.shape(atr_conv_1d)[1]
+    # causal = tf.pad(tf.slice(atr_conv_1d, [0, 0], [width - pad_right, dilation_channels]),
+    #                 [[pad_right, 0], [0, 0]])
+    # return causal
+
+    # Instead we use this implementation from Igor Babuschkin:
+    atr_conv_1d_with_batch = ops.causal_conv(value_with_batch, filters, rate)
+    atr_conv_1d = tf.squeeze(atr_conv_1d_with_batch, [0])
+    # atr_conv_1d shape is [width, dilation_channels]
+
+    #return atr_conv_1d
+    return tf.zeros([width, dilation_channels])
+
+# Returns a tuple of output to the next layer and skip output.
+# The shape of x is [width, dense_channels]
+def gated_unit(x, dilation, parameters, layer_index):
+    #tf.histogram_summary('{}_x'.format(layer_index), x)
+
+    filter_width = parameters['filter_width']
+    dense_channels = parameters['dense_channels']
+    dilation_channels = parameters['dilation_channels']
+    quantization_channels = parameters['quantization_channels']
+
+    w1 = tf.Variable(tf.random_normal([filter_width, dense_channels, dilation_channels], stddev=0.05),
+            dtype=tf.float32, name='w1')
+    w2 = tf.Variable(tf.random_normal([filter_width, dense_channels, dilation_channels], stddev=0.05),
+            dtype=tf.float32, name='w2')
+    cw = tf.Variable(tf.random_normal([1, dilation_channels, dense_channels], mean=1.0, stddev=0.05),
+            dtype=tf.float32, name='cw')
+
+    #tf.histogram_summary('{}_w1'.format(layer_index), w1)
+    #tf.histogram_summary('{}_w2'.format(layer_index), w2)
+    #tf.histogram_summary('{}_cw'.format(layer_index), cw)
+
+    with tf.name_scope('causal_atrous_convolution'):
+        dilated1 = causal_atrous_conv1d(x, w1, dilation, 'SAME')
+        dilated2 = causal_atrous_conv1d(x, w2, dilation, 'SAME')
+    with tf.name_scope('gated_unit'):
+        z = tf.multiply(tf.tanh(dilated1), tf.sigmoid(dilated2))
+    # dilated1, dilated2, z shapes are [width, dilation_channels]
+    skip = tf.squeeze(tf.nn.conv1d(tf.expand_dims(z, 0), cw, 1, 'SAME'), [0])
+    #tf.histogram_summary('{}_skip'.format(layer_index), skip)
+    output = skip + x
+    #tf.histogram_summary('{}_output'.format(layer_index), output)
+    # combined and output shapes are [width, dense_channels]
+    return (output, skip)
+
+# Returns a tuple of (output, non-softmaxed-logits output)
+# The non-softmaxed output is used for the loss calculation.
+# The shape of x is [width, quantization_channels]
+# The shape of output is [width, quantization_channels]
+# Dilations is an array of [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1, 2, ..., 512]
+def layers(x, parameters):
+    dilations = parameters['dilations']
+    quantization_channels = parameters['quantization_channels']
+    dense_channels = parameters['dense_channels']
+    width = parameters['sample_length']
+
+    co_dense = tf.Variable(tf.random_normal([1, quantization_channels, dense_channels], mean=1.0, stddev=0.05),
+            dtype=tf.float32, name='dense_w')
+
+    next_input = tf.squeeze(tf.nn.conv1d(tf.expand_dims(x, 0), co_dense, 1, 'SAME'), [0]) # , use_cudnn_on_gpu=False not supported...
+    skip_connections = []
+    for (i, dilation) in enumerate(dilations):
+        with tf.name_scope('layer_{}'.format(i)):
+            print "Creating layer {}".format(i)
+            #(output, skip) = gated_unit(next_input, dilation, parameters, i)
+            output = tf.zeros([width, dense_channels])
+            skip = tf.zeros([width, dense_channels])
+            # output and skip shapes are [width, dense_channels]
+            next_input = output
+            skip_connections.append(skip)
+            sys.stdout.flush()
+    #skips_tensor = tf.nn.relu(tf.pack(skip_connections, 2))
+
+    #co1 = tf.Variable(tf.random_normal([1, 1, len(dilations), 1], mean=1.0, stddev=0.05),
+    #        dtype=tf.float32, name='co1')
+
+    #weighted_skips = tf.squeeze(tf.nn.conv2d(tf.expand_dims(skips_tensor, 0), co1, [1, 1, 1, 1], padding = 'SAME'), [0, 3])
+    weighted_skips = tf.zeros([width, dense_channels])
+
+    # weighted_skips shape is [width, dense_channels]
+    #relu1 = tf.nn.relu(weighted_skips)
+
+    #co2 = tf.Variable(tf.random_normal([1, dense_channels, 256], mean=1.0, stddev=0.05),
+    #        dtype=tf.float32, name='co2')
+
+    #raw_output = tf.squeeze(tf.nn.conv1d(tf.expand_dims(relu1, 0), co2, 1, 'SAME'), [0])
+    raw_output = tf.zeros([width, quantization_channels])
+    # raw_output shape is [width, quantization_channels]
+    #output = tf.nn.softmax(raw_output)
+    sm_outputs = []
+    for i in range(width):
+        sm_outputs.append(tf.nn.softmax(tf.slice(raw_output, [i, 0], [1, -1])))
+    output = tf.pack(sm_outputs, 0) 
+    #output = tf.zeros([width, quantization_channels])
+    return (output, raw_output)
+
+def create(parameters):
+    quantization_channels = parameters['quantization_channels']
+    sample_length = parameters['sample_length']
+    input = tf.placeholder(tf.float32, shape=(sample_length), name='input')
+    y = input
+    x = tf.pad(tf.slice(input, [0], [tf.shape(input)[0] - 1]), [[1, 0]])
+    width = tf.shape(x)[0]
+    # x is shifted right by one and padded by zero.
+    mu_lawd = mu_law(x, float(quantization_channels - 1))
+    shifted_mu_law_x = tf.one_hot(mu_lawd, quantization_channels)
+
+    classes_y = mu_law(y, quantization_channels - 1)
+    (output, raw_output) = layers(shifted_mu_law_x, parameters)
+    #output = tf.zeros([width,quantization_channels])
+    #raw_output = tf.zeros([width,quantization_channels])
+    cost = tf.nn.sparse_softmax_cross_entropy_with_logits(raw_output, classes_y, name='cost')
+
+    tvars = tf.trainable_variables()
+    #gradients = tf.gradients(cost, tvars)
+    # grads, _ = tf.clip_by_global_norm(gradients, parameters['clip_gradients'])
+    optimizer = tf.train.AdamOptimizer(learning_rate = parameters['learning_rate'])
+
+    #train_op = optimizer.apply_gradients(zip(gradients, tvars))
+    train_op = x
+    tf.add_check_numerics_ops()
+
+    model = {
+        'output': output,
+        'optimizer': train_op,
+        'x': input,
+        'cost': cost
+    }
+    return model
+
+def create_generative_model(parameters):
+    quantization_channels = parameters['quantization_channels']
+    input = tf.placeholder(tf.float32, name='input')
+    mu_law_input = tf.one_hot(mu_law(input, float(quantization_channels - 1)), quantization_channels)
+
+    (full_generated_output, _) = layers(mu_law_input, parameters)
+    # Generated output is only the last predicted distribution
+    generated_output = tf.squeeze(tf.slice(full_generated_output, [tf.shape(full_generated_output)[0] - 1, 0], [1, -1]), [0])
+
+    model = {
+        'generated_output': generated_output,
+        'x': input
+    }
+    return model
diff --git a/generate.py b/generate.py
@@ -67,7 +67,7 @@
         [probabilities] = sess.run([generative_model['generated_output']], feed_dict = {
                 generative_model['mu_law_input']: mu_law_input
             })
-        image.append(probabilities)
+#        image.append(probabilities)
 
         def choose_value(sample):
             sample = np.asarray(sample)
@@ -87,7 +87,7 @@ def choose_value(sample):
                                        axis=0))[1:, :]
 
         output_signal = np.append(output_signal, next_val)
-        export_to_octave.save('image.mat', 'i', image)
+#        export_to_octave.save('image.mat', 'i', image)
         wav = np.asarray(map(int, output_signal * (2.**15)), dtype=np.int16)
         wav2 = np.asarray(map(int, signal * (2.**15)), dtype=np.int16)
         export_to_octave.save('sound.mat', 's', wav)

diff --git a/inspect_checkpoint.py b/inspect_checkpoint.py
@@ -0,0 +1,69 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A simple script for inspect checkpoint files."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+tf.app.flags.DEFINE_string("file_name", "", "Checkpoint filename")
+tf.app.flags.DEFINE_string("tensor_name", "", "Name of the tensor to inspect")
+tf.app.flags.DEFINE_bool("all_tensors", "False",
+                         "If True, print the values of all the tensors.")
+
+
+def print_tensors_in_checkpoint_file(file_name, tensor_name):
+  """Prints tensors in a checkpoint file.
+  If no `tensor_name` is provided, prints the tensor names and shapes
+  in the checkpoint file.
+  If `tensor_name` is provided, prints the content of the tensor.
+  Args:
+    file_name: Name of the checkpoint file.
+    tensor_name: Name of the tensor in the checkpoint file to print.
+  """
+  try:
+    reader = tf.train.NewCheckpointReader(file_name)
+    if FLAGS.all_tensors:
+      var_to_shape_map = reader.get_variable_to_shape_map()
+      for key in var_to_shape_map:
+        print("tensor_name: ", key)
+        print(reader.get_tensor(key))
+    elif not tensor_name:
+      print(reader.debug_string().decode("utf-8"))
+    else:
+      print("tensor_name: ", tensor_name)
+      print(reader.get_tensor(tensor_name))
+  except Exception as e:  # pylint: disable=broad-except
+    print(str(e))
+    if "corrupted compressed block contents" in str(e):
+      print("It's likely that your checkpoint file has been compressed "
+            "with SNAPPY.")
+
+
+def main(unused_argv):
+  if not FLAGS.file_name:
+    print("Usage: inspect_checkpoint --file_name=checkpoint_file_name "
+          "[--tensor_name=tensor_to_print]")
+    sys.exit(1)
+  else:
+    print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name)
+
+if __name__ == "__main__":
+    tf.app.run()
diff --git a/model.py b/model.py
@@ -107,7 +107,7 @@ def gated_unit(x, dilation, parameters, layer_index, noise):
         dilated1 = causal_atrous_conv1d(x, w1, dilation, 'SAME')
         dilated2 = causal_atrous_conv1d(x, w2, dilation, 'SAME')
     with tf.name_scope('gated_unit'):
-        z = tf.mul(tf.tanh(dilated1), tf.sigmoid(dilated2))
+        z = tf.multiply(tf.tanh(dilated1), tf.sigmoid(dilated2))
     # dilated1, dilated2, z shapes are [width, dilation_channels]
     output = conv1d(z, cw) + x
     # combined and output shapes are [width, dense_channels]
@@ -176,7 +176,7 @@ def create(parameters):
     # regularization loss with the normal loss.
     reg_loss = reg_loss / 100000.0
 
-    cost = tf.nn.sparse_softmax_cross_entropy_with_logits(raw_output, classes_y, name='cost')
+    cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=raw_output, labels=classes_y, name='cost')
     cost_plus_regularization = cost + reg_loss
 
     tvars = tf.trainable_variables()

diff --git a/params.py b/params.py
@@ -1,5 +1,5 @@
 parameters = {
-    'learning_rate': 0.04,
+    'learning_rate': 0.0005,
     'display_step': 10,
     'input_noise': 0.005,
     'input_salt_and_pepper_noise': 0.005,
@@ -17,5 +17,5 @@
     'sample_length': 1023 * 4 + 1,
     'training_length': 1024 * 512,
     'clip_gradients': 1000.0,
-    'temperature': 0.7
+    'temperature': 1.0
 }
diff --git a/plot_generation.m b/plot_generation.m
@@ -1,12 +1,11 @@
 load("sound.mat")
-load("image.mat")
+#load("image.mat")
 subplot(1,1,1)
 size(s)
 seed_length = 1024 * 512;
-s = s(:,seed_length:size(s)(2));
-subplot(1, 2, 1)
+s = s(:,seed_length-500:size(s)(2));
 plot(s)
 soundsc(s, 48000)
-subplot(1, 2, 2)
-imagesc(flipud(i'))
+#subplot(1, 2, 2)
+#imagesc(flipud(i'))
 wavwrite(s, 48000, "generated.wav")
diff --git a/train.py b/train.py
@@ -73,7 +73,7 @@ def choose_value(sample):
             sampled = np.random.choice(np.arange(parameters['quantization_channels']), p=sample)
             return operations.de_mu_law(sampled, float(parameters['quantization_channels'] - 1))
 
-        writer = tf.train.SummaryWriter("logs", sess.graph)
+        writer = tf.summary.FileWriter("logs", sess.graph)
 
         iter = 1
         train_error_trend = []
@@ -183,4 +183,4 @@ def choose_value(sample):
 
         # Returning the last loss value for hyper parameter search
         return last_loss
-
+