Skip to content

Commit

Permalink
Merge pull request #75 from ponder-lab/test_autoencoder
Browse files Browse the repository at this point in the history
Add autoencoder test.
  • Loading branch information
khatchad authored Jan 23, 2024
2 parents e50f711 + 39e6cb0 commit 76fef4b
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ public void testTf2()
3); // NOTE: Change to 2 tensor parameters and 5 tensor variables once
// https://github.com/wala/ML/issues/127 is fixed. Values 2 and 3 will correspond to the
// tensor parameters.
testTf2("autoencoder.py", "encoder", 1, 10, 2);
testTf2("autoencoder.py", "mean_square", 1, 1, 3);
testTf2("autoencoder.py", "run_optimization", 1, 1, 2);
testTf2("autoencoder.py", "decoder", 1, 8, 2);
}

private void testTf2(
Expand Down
188 changes: 188 additions & 0 deletions com.ibm.wala.cast.python.test/data/autoencoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# From https://github.com/aymericdamien/TensorFlow-Examples/blob/6dcbe14649163814e72a22a999f20c5e247ce988/tensorflow_v2/notebooks/3_NeuralNetworks/autoencoder.ipynb.

# %%
# """
# # Auto-Encoder Example

# Build a 2 layers auto-encoder with TensorFlow v2 to compress images to a lower latent space and then reconstruct them.

# - Author: Aymeric Damien
# - Project: https://github.com/aymericdamien/TensorFlow-Examples/
# """

# %%
# """
# ## Auto-Encoder Overview

# <img src="http://kvfrans.com/content/images/2016/08/autoenc.jpg" alt="ae" style="width: 800px;"/>

# References:
# - [Gradient-based learning applied to document recognition](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf). Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. Proceedings of the IEEE, 86(11):2278-2324, November 1998.

# ## MNIST Dataset Overview

# This example is using MNIST handwritten digits. The dataset contains 60,000 examples for training and 10,000 examples for testing. The digits have been size-normalized and centered in a fixed-size image (28x28 pixels) with values from 0 to 255.

# In this example, each image will be converted to float32, normalized to [0, 1] and flattened to a 1-D array of 784 features (28*28).

# ![MNIST Dataset](http://neuralnetworksanddeeplearning.com/images/mnist_100_digits.png)

# More info: http://yann.lecun.com/exdb/mnist/
# """

# %%
from __future__ import absolute_import, division, print_function

import tensorflow as tf
print("TensorFlow version:", tf.__version__)
assert(tf.__version__ == "2.9.3")
import numpy as np

# %%
# MNIST Dataset parameters.
num_features = 784 # data features (img shape: 28*28).

# Training parameters.
learning_rate = 0.01
training_steps = 1
batch_size = 256
display_step = 1000

# Network Parameters
num_hidden_1 = 128 # 1st layer num features.
num_hidden_2 = 64 # 2nd layer num features (the latent dim).

# %%
# Prepare MNIST data.
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Convert to float32.
x_train, x_test = x_train.astype(np.float32), x_test.astype(np.float32)
# Flatten images to 1-D vector of 784 features (28*28).
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])
# Normalize images value from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.

# %%
# Use tf.data API to shuffle and batch data.
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(10000).batch(batch_size).prefetch(1)

test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_data = test_data.repeat().batch(batch_size).prefetch(1)

# %%
# Store layers weight & bias

# A random value generator to initialize weights.
random_normal = tf.initializers.RandomNormal()

weights = {
'encoder_h1': tf.Variable(random_normal([num_features, num_hidden_1])),
'encoder_h2': tf.Variable(random_normal([num_hidden_1, num_hidden_2])),
'decoder_h1': tf.Variable(random_normal([num_hidden_2, num_hidden_1])),
'decoder_h2': tf.Variable(random_normal([num_hidden_1, num_features])),
}
biases = {
'encoder_b1': tf.Variable(random_normal([num_hidden_1])),
'encoder_b2': tf.Variable(random_normal([num_hidden_2])),
'decoder_b1': tf.Variable(random_normal([num_hidden_1])),
'decoder_b2': tf.Variable(random_normal([num_features])),
}


# %%
# Building the encoder.
def encoder(x):
# Encoder Hidden layer with sigmoid activation.
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
biases['encoder_b1']))
# Encoder Hidden layer with sigmoid activation.
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
biases['encoder_b2']))
return layer_2


# Building the decoder.
def decoder(x):
# Decoder Hidden layer with sigmoid activation.
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
biases['decoder_b1']))
# Decoder Hidden layer with sigmoid activation.
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
biases['decoder_b2']))
return layer_2


# %%
# Mean square loss between original images and reconstructed ones.
def mean_square(reconstructed, original):
return tf.reduce_mean(tf.pow(original - reconstructed, 2))


# Adam optimizer.
optimizer = tf.optimizers.Adam(learning_rate=learning_rate)


# %%
# Optimization process.
def run_optimization(x):
# Wrap computation inside a GradientTape for automatic differentiation.
with tf.GradientTape() as g:
reconstructed_image = decoder(encoder(x))
loss = mean_square(reconstructed_image, x)

# Variables to update, i.e. trainable variables.
trainable_variables = list(weights.values()) + list(biases.values())

# Compute gradients.
gradients = g.gradient(loss, trainable_variables)

# Update W and b following gradients.
optimizer.apply_gradients(zip(gradients, trainable_variables))

return loss


# %%
# Run training for the given number of steps.
for step, (batch_x, _) in enumerate(train_data.take(training_steps + 1)):

# Run the optimization.
loss = run_optimization(batch_x)

if step % display_step == 0:
print("step: %i, loss: %f" % (step, loss))

# %%
# Testing and Visualization.
import matplotlib.pyplot as plt

# %%
# Encode and decode images from test set and visualize their reconstruction.
n = 4
canvas_orig = np.empty((28 * n, 28 * n))
canvas_recon = np.empty((28 * n, 28 * n))
for i, (batch_x, _) in enumerate(test_data.take(n)):
# Encode and decode the digit image.
reconstructed_images = decoder(encoder(batch_x))
# Display original images.
for j in range(n):
# Draw the generated digits.
img = batch_x[j].numpy().reshape([28, 28])
canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = img
# Display reconstructed images.
for j in range(n):
# Draw the generated digits.
reconstr_img = reconstructed_images[j].numpy().reshape([28, 28])
canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = reconstr_img

# print("Original Images")
# plt.figure(figsize=(n, n))
# plt.imshow(canvas_orig, origin="upper", cmap="gray")
# plt.show()
#
# print("Reconstructed Images")
# plt.figure(figsize=(n, n))
# plt.imshow(canvas_recon, origin="upper", cmap="gray")
# plt.show()

0 comments on commit 76fef4b

Please sign in to comment.