In this project, you'll use generative adversarial networks to generate new images of faces.
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Orbitron|Roboto');
body {background-color: lavender;}
a {color: #8A2BE2; font-family: Roboto;}
h1, h2 {color: #9370DB; font-family: Orbitron; text-shadow: 4px 4px 4px #aaa;}
h3, h4 {color: #663399; font-family: Roboto; text-shadow: 4px 4px 4px #aaa;}
span {text-shadow: 4px 4px 4px #ccc;}
div.output_prompt, div.output_area pre {color: slategray;}
div.input_prompt, div.output_subarea {color: #8A2BE2;}
div.output_stderr pre {background-color: lavender;}
div.output_stderr {background-color: slategrey;}
</style>
<script>
code_show = true;
function code_display() {
if (code_show) {
$('div.input').each(function(id) {
if (id == 0 || $(this).html().indexOf('hide_code') > -1) {$(this).hide();}
});
$('div.output_prompt').css('opacity', 0);
} else {
$('div.input').each(function(id) {$(this).show();});
$('div.output_prompt').css('opacity', 1);
};
code_show = !code_show;
}
$(document).ready(code_display);
</script>
<form action="javascript: code_display()">
<input style="color: #8A2BE2; background: lavender; text-shadow: 4px 4px 4px #aaa;" \
type="submit" value="Click to display or hide code cells">
</form>
hide_code=''
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import math
import os
import hashlib
from urllib.request import urlretrieve
import zipfile
import gzip
import shutil
import numpy as np
from PIL import Image
from tqdm import tqdm
from copy import deepcopy
from unittest import mock
from distutils.version import LooseVersion
import warnings
import tensorflow as tf
from glob import glob
from matplotlib import pyplot
from matplotlib import cm
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/helper.py
def _read32(bytestream):
"""Read 32-bit integer from bytesteam
:param bytestream: A bytestream
:return: 32-bit integer"""
dt = np.dtype(np.uint32).newbyteorder('>')
return np.frombuffer(bytestream.read(4), dtype=dt)[0]
def _unzip(save_path, _, database_name, data_path):
"""Unzip wrapper with the same interface as _ungzip
:param save_path: The path of the gzip files
:param database_name: Name of database
:param data_path: Path to extract to
:param _: HACK - Used to have to same interface as _ungzip"""
print('Extracting {}...'.format(database_name))
with zipfile.ZipFile(save_path) as zf:
zf.extractall(data_path)
def _ungzip(save_path, extract_path, database_name, _):
"""Unzip a gzip file and extract it to extract_path
:param save_path: The path of the gzip files
:param extract_path: The location to extract the data to
:param database_name: Name of database
:param _: HACK - Used to have to same interface as _unzip"""
# Get data from save_path
with open(save_path, 'rb') as f:
with gzip.GzipFile(fileobj=f) as bytestream:
magic = _read32(bytestream)
if magic != 2051:
raise ValueError('Invalid magic number {} in file: {}'.format(magic, f.name))
num_images = _read32(bytestream)
rows = _read32(bytestream)
cols = _read32(bytestream)
buf = bytestream.read(rows * cols * num_images)
data = np.frombuffer(buf, dtype=np.uint8)
data = data.reshape(num_images, rows, cols)
# Save data to extract_path
for image_i, image in enumerate(
tqdm(data, unit='File', unit_scale=True, miniters=1, desc='Extracting {}'.format(database_name))):
Image.fromarray(image, 'L').save(os.path.join(extract_path, 'image_{}.jpg'.format(image_i)))
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/helper.py
def get_image(image_path, width, height, mode):
"""Read image from image_path
:param image_path: Path of image
:param width: Width of image
:param height: Height of image
:param mode: Mode of image
:return: Image data"""
image = Image.open(image_path)
if image.size != (width, height): # HACK - Check if image is from the CELEBA dataset
# Remove most pixels that aren't part of a face
face_width = face_height = 108
j = (image.size[0] - face_width) // 2
i = (image.size[1] - face_height) // 2
image = image.crop([j, i, j + face_width, i + face_height])
image = image.resize([width, height], Image.BILINEAR)
return np.array(image.convert(mode))
def get_batch(image_files, width, height, mode):
data_batch = np.array(
[get_image(sample_file, width, height, mode) for sample_file in image_files]).astype(np.float32)
# Make sure the images are in 4 dimensions
if len(data_batch.shape) < 4:
data_batch = data_batch.reshape(data_batch.shape + (1,))
return data_batch
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/helper.py
def images_square_grid(images, mode):
"""Save images as a square grid
:param images: Images to be used for the grid
:param mode: The mode to use for images
:return: Image of images in a square grid"""
# Get maximum size for square grid of images
save_size = math.floor(np.sqrt(images.shape[0]))
# Scale to 0-255
images = (((images - images.min()) * 255) / (images.max() - images.min())).astype(np.uint8)
# Put images in a square arrangement
images_in_square = np.reshape(
images[:save_size*save_size],
(save_size, save_size, images.shape[1], images.shape[2], images.shape[3]))
if mode == 'L':
images_in_square = np.squeeze(images_in_square, 4)
# Combine images to grid image
new_im = Image.new(mode, (images.shape[1] * save_size, images.shape[2] * save_size))
for col_i, col_images in enumerate(images_in_square):
for image_i, image in enumerate(col_images):
im = Image.fromarray(image, mode)
new_im.paste(im, (col_i * images.shape[1], image_i * images.shape[2]))
return new_im
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/helper.py
def download_extract(database_name, data_path):
"""Download and extract database
:param database_name: Database name"""
DATASET_CELEBA_NAME = 'celeba'
DATASET_MNIST_NAME = 'mnist'
if database_name == DATASET_CELEBA_NAME:
url = 'https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/celeba.zip'
hash_code = '00d2c5bc6d35e252742224ab0c1e8fcb'
extract_path = os.path.join(data_path, 'img_align_celeba')
save_path = os.path.join(data_path, 'celeba.zip')
extract_fn = _unzip
elif database_name == DATASET_MNIST_NAME:
url = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
hash_code = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
extract_path = os.path.join(data_path, 'mnist')
save_path = os.path.join(data_path, 'train-images-idx3-ubyte.gz')
extract_fn = _ungzip
if os.path.exists(extract_path):
print('Found {} Data'.format(database_name))
return
if not os.path.exists(data_path):
os.makedirs(data_path)
if not os.path.exists(save_path):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='Downloading {}'.format(database_name)) as pbar:
urlretrieve(
url,
save_path,
pbar.hook)
assert hashlib.md5(open(save_path, 'rb').read()).hexdigest() == hash_code, \
'{} file is corrupted. Remove the file and try again.'.format(save_path)
os.makedirs(extract_path)
try:
extract_fn(save_path, extract_path, database_name, data_path)
except Exception as err:
shutil.rmtree(extract_path) # Remove extraction folder if there is an error
raise err
# Remove compressed data
os.remove(save_path)
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/helper.py
class Dataset(object):
def __init__(self, dataset_name, data_files):
"""Initalize the class
:param dataset_name: Database name
:param data_files: List of files in the database"""
DATASET_CELEBA_NAME = 'celeba'
DATASET_MNIST_NAME = 'mnist'
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
if dataset_name == DATASET_CELEBA_NAME:
self.image_mode = 'RGB'
image_channels = 3
elif dataset_name == DATASET_MNIST_NAME:
self.image_mode = 'L'
image_channels = 1
self.data_files = data_files
self.shape = len(data_files), IMAGE_WIDTH, IMAGE_HEIGHT, image_channels
def get_batches(self, batch_size):
"""Generate batches
:param batch_size: Batch Size
:return: Batches of data"""
IMAGE_MAX_VALUE = 255
current_index = 0
while current_index + batch_size <= self.shape[0]:
data_batch = get_batch(
self.data_files[current_index:current_index + batch_size],
*self.shape[1:3],
self.image_mode)
current_index += batch_size
yield data_batch / IMAGE_MAX_VALUE - 0.5
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/helper.py
class DLProgress(tqdm):
"""Handle Progress Bar while Downloading"""
last_block = 0
def hook(self, block_num=1, block_size=1, total_size=None):
"""A hook function that will be called once on establishment of the network connection
and once after each block read thereafter.
:param block_num: A count of blocks transferred so far
:param block_size: Block size in bytes
:param total_size: The total size of the file. This may be -1 on older FTP servers
which do not return a file size in response to a retrieval request."""
self.total = total_size
self.update((block_num - self.last_block) * block_size)
self.last_block = block_num
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/problem_unittests.py
def test_safe(func):
"""
Isolate tests
"""
def func_wrapper(*args):
with tf.Graph().as_default():
result = func(*args)
print('Tests Passed')
return result
return func_wrapper
def _assert_tensor_shape(tensor, shape, display_name):
assert tf.assert_rank(tensor, len(shape), message='{} has wrong rank'.format(display_name))
tensor_shape = tensor.get_shape().as_list() if len(shape) else []
wrong_dimension = [ten_dim for ten_dim, cor_dim in zip(tensor_shape, shape)
if cor_dim is not None and ten_dim != cor_dim]
assert not wrong_dimension, \
'{} has wrong shape. Found {}'.format(display_name, tensor_shape)
def _check_input(tensor, shape, display_name, tf_name=None):
assert tensor.op.type == 'Placeholder', \
'{} is not a Placeholder.'.format(display_name)
_assert_tensor_shape(tensor, shape, 'Real Input')
if tf_name:
assert tensor.name == tf_name, \
'{} has bad name. Found name {}'.format(display_name, tensor.name)
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/problem_unittests.py
class TmpMock():
"""Mock a attribute. Restore attribute when exiting scope."""
def __init__(self, module, attrib_name):
self.original_attrib = deepcopy(getattr(module, attrib_name))
setattr(module, attrib_name, mock.MagicMock())
self.module = module
self.attrib_name = attrib_name
def __enter__(self):
return getattr(self.module, self.attrib_name)
def __exit__(self, type, value, traceback):
setattr(self.module, self.attrib_name, self.original_attrib)
hide_code
# https://github.com/udacity/deep-learning/blob/master/face_generation/problem_unittests.py
@test_safe
def test_model_inputs(model_inputs):
image_width = 28
image_height = 28
image_channels = 3
z_dim = 100
input_real, input_z, learn_rate = model_inputs(image_width, image_height, image_channels, z_dim)
_check_input(input_real, [None, image_width, image_height, image_channels], 'Real Input')
_check_input(input_z, [None, z_dim], 'Z Input')
_check_input(learn_rate, [], 'Learning Rate')
@test_safe
def test_discriminator(discriminator, tf_module):
with TmpMock(tf_module, 'variable_scope') as mock_variable_scope:
image = tf.placeholder(tf.float32, [None, 28, 28, 3])
output, logits = discriminator(image)
_assert_tensor_shape(output, [None, 1], 'Discriminator Training(reuse=false) output')
_assert_tensor_shape(logits, [None, 1], 'Discriminator Training(reuse=false) Logits')
assert mock_variable_scope.called,\
'tf.variable_scope not called in Discriminator Training(reuse=false)'
assert mock_variable_scope.call_args == mock.call('discriminator', reuse=False), \
'tf.variable_scope called with wrong arguments in Discriminator Training(reuse=false)'
mock_variable_scope.reset_mock()
output_reuse, logits_reuse = discriminator(image, True)
_assert_tensor_shape(output_reuse, [None, 1], 'Discriminator Inference(reuse=True) output')
_assert_tensor_shape(logits_reuse, [None, 1], 'Discriminator Inference(reuse=True) Logits')
assert mock_variable_scope.called, \
'tf.variable_scope not called in Discriminator Inference(reuse=True)'
assert mock_variable_scope.call_args == mock.call('discriminator', reuse=True), \
'tf.variable_scope called with wrong arguments in Discriminator Inference(reuse=True)'
@test_safe
def test_generator(generator, tf_module):
with TmpMock(tf_module, 'variable_scope') as mock_variable_scope:
z = tf.placeholder(tf.float32, [None, 100])
out_channel_dim = 5
output = generator(z, out_channel_dim)
_assert_tensor_shape(output, [None, 28, 28, out_channel_dim], 'Generator output (is_train=True)')
assert mock_variable_scope.called, \
'tf.variable_scope not called in Generator Training(reuse=false)'
assert mock_variable_scope.call_args == mock.call('generator', reuse=False), \
'tf.variable_scope called with wrong arguments in Generator Training(reuse=false)'
mock_variable_scope.reset_mock()
output = generator(z, out_channel_dim, False)
_assert_tensor_shape(output, [None, 28, 28, out_channel_dim], 'Generator output (is_train=False)')
assert mock_variable_scope.called, \
'tf.variable_scope not called in Generator Inference(reuse=True)'
assert mock_variable_scope.call_args == mock.call('generator', reuse=True), \
'tf.variable_scope called with wrong arguments in Generator Inference(reuse=True)'
@test_safe
def test_model_loss(model_loss):
out_channel_dim = 4
input_real = tf.placeholder(tf.float32, [None, 28, 28, out_channel_dim])
input_z = tf.placeholder(tf.float32, [None, 100])
d_loss, g_loss = model_loss(input_real, input_z, out_channel_dim)
_assert_tensor_shape(d_loss, [], 'Discriminator Loss')
_assert_tensor_shape(d_loss, [], 'Generator Loss')
@test_safe
def test_model_opt(model_opt, tf_module):
with TmpMock(tf_module, 'trainable_variables') as mock_trainable_variables:
with tf.variable_scope('discriminator'):
discriminator_logits = tf.Variable(tf.zeros([3, 3]))
with tf.variable_scope('generator'):
generator_logits = tf.Variable(tf.zeros([3, 3]))
mock_trainable_variables.return_value = [discriminator_logits, generator_logits]
d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
logits=discriminator_logits,
labels=[[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0]]))
g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
logits=generator_logits,
labels=[[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0]]))
learning_rate = 0.001
beta1 = 0.9
d_train_opt, g_train_opt = model_opt(d_loss, g_loss, learning_rate, beta1)
assert mock_trainable_variables.called,\
'tf.mock_trainable_variables not called'
You'll be using two datasets in this project:
Since the celebA dataset is complex and you're doing GANs in a project for the first time, we want you to test your neural network on MNIST before CelebA. Running the GANs on MNIST will allow you to see how well your model trains sooner.
If you're using FloydHub, set data_dir
to "/input" and use the FloydHub data ID "R5KrjnANiKVhLWAkpXhNBe".
hide_code
data_dir = 'data'
# FloydHub - Use with data ID "R5KrjnANiKVhLWAkpXhNBe"
# data_dir = '/input'
"""DON'T MODIFY ANYTHING IN THIS CELL"""
download_extract('mnist', data_dir)
download_extract('celeba', data_dir)
hide_code
show_n_images = 25
"""DON'T MODIFY ANYTHING IN THIS CELL"""
mnist_images = get_batch(glob(os.path.join(data_dir, 'mnist/*.jpg'))[:show_n_images], 28, 28, 'L')
pyplot.imshow(images_square_grid(mnist_images, 'L'), cmap=cm.bone);
The CelebFaces Attributes Dataset (CelebA) dataset contains over 200,000 celebrity images with annotations. Since you're going to be generating faces, you won't need the annotations. You can view the first number of examples by changing show_n_images
.
hide_code
"""DON'T MODIFY ANYTHING IN THIS CELL"""
celeba_images = get_batch(glob(os.path.join(data_dir, 'img_align_celeba/*.jpg'))[:show_n_images], 28, 28, 'RGB')
pyplot.imshow(images_square_grid(celeba_images, 'RGB'));
Since the project's main focus is on building the GANs, we'll preprocess the data for you.
The MNIST images are black and white images with a single color channel while the CelebA images have 3 color channels (RGB color channel).
You'll build the components necessary to build a GANs by implementing the following functions below:
model_inputs
discriminator
generator
model_loss
model_opt
train
This will check to make sure you have the correct version of TensorFlow and access to a GPU
hide_code
"""DON'T MODIFY ANYTHING IN THIS CELL"""
# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), \
'Please use TensorFlow version 1.0 or newer. You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))
# Check for a GPU
if not tf.test.gpu_device_name():
warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
Implement the model_inputs
function to create TF Placeholders for the Neural Network. It should create the following placeholders:
image_width
, image_height
, and image_channels
.z_dim
.Return the placeholders in the following the tuple (tensor of real input images, tensor of z data)
hide_code
alpha0 = 0.1 # for leaky_relu activation
stddev0 = 0.01 # for initializers
def model_inputs(image_width, image_height, image_channels, z_dim):
"""
Create the model inputs
:param image_width: The input image width
:param image_height: The input image height
:param image_channels: The number of image channels
:param z_dim: The dimension of Z
:return: Tuple of (tensor of real input images, tensor of z data, learning rate)
"""
# TODO: Implement Function
input_real = tf.placeholder(tf.float32,
shape=[None, image_width, image_height, image_channels], # rank 4
name="Real_Input")
input_z = tf.placeholder(tf.float32, shape=[None, z_dim], name="Z_Input") # rank 2
input_learning_rate = tf.placeholder(tf.float32, shape=[], name="Learning_Rate") # rank 0
return input_real, input_z, input_learning_rate
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
test_model_inputs(model_inputs)
Implement discriminator
to create a discriminator neural network that discriminates on images
. This function should be able to reuse the variables in the neural network. Use tf.variable_scope
with a scope name of "discriminator" to allow the variables to be reused. The function should return a tuple of (tensor output of the discriminator, tensor logits of the discriminator).
hide_code
def discriminator(images, reuse=False, alpha=alpha0):
"""
Create the discriminator network
:param images: Tensor of input image(s)
:param reuse: Boolean if the weights should be reused
:return: Tuple of (tensor output of the discriminator, tensor logits of the discriminator)
"""
# TODO: Implement Function
with tf.variable_scope('discriminator', reuse=reuse):
# image shape [28,28,3]
x = tf.layers.conv2d(images, 32, 5, strides=2,
activation=tf.nn.relu,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
# input shape [14,14,32]
x = tf.layers.conv2d(x, 96, 5, strides=2, activation=tf.nn.relu,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.layers.batch_normalization(x, training=True)
# input shape [7,7,96]
x = tf.layers.conv2d(x, 128, 5, strides=2, activation=tf.nn.relu,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.layers.batch_normalization(x, training=True)
# input shape [4,4,128]
x = tf.reshape(x, (-1, 4*4*128))
discriminator_logits = tf.layers.dense(x, 1)
discriminator_outputs = tf.sigmoid(discriminator_logits)
return discriminator_outputs, discriminator_logits
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
test_discriminator(discriminator, tf)
hide_code
def discriminator_l(images, reuse=False, alpha=alpha0):
"""
Create the discriminator network
:param images: Tensor of input image(s)
:param reuse: Boolean if the weights should be reused
:return: Tuple of (tensor output of the discriminator, tensor logits of the discriminator)
"""
# TODO: Implement Function
with tf.variable_scope('discriminator', reuse=reuse):
# image shape [28,28,3]
x = tf.layers.conv2d(images, 32, 5, strides=2,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.maximum(x * alpha, x) # leaky_relu activation
# input shape [14,14,32]
x = tf.layers.conv2d(x, 96, 5, strides=2,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.maximum(x * alpha, x) # leaky_relu activation
x = tf.layers.batch_normalization(x, training=True)
# input shape [7,7,96]
x = tf.layers.conv2d(x, 128, 5, strides=2,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.maximum(x * alpha, x) # leaky_relu activation
x = tf.layers.batch_normalization(x, training=True)
# input shape [4,4,128]
x = tf.reshape(x, (-1, 4*4*128))
discriminator_logits = tf.layers.dense(x, 1)
discriminator_outputs = tf.sigmoid(discriminator_logits)
return discriminator_outputs, discriminator_logits
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
test_discriminator(discriminator_l, tf)
Implement generator
to generate an image using z
. This function should be able to reuse the variables in the neural network. Use tf.variable_scope
with a scope name of "generator" to allow the variables to be reused. The function should return the generated 28 x 28 x out_channel_dim
images.
hide_code
def generator(z, out_channel_dim, is_train=True, alpha=alpha0):
"""
Create the generator network
:param z: Input z
:param out_channel_dim: The number of channels in the output image
:param is_train: Boolean if generator is being used for training
:return: The tensor output of the generator
"""
# TODO: Implement Function
with tf.variable_scope('generator', reuse=(not is_train)):
x = tf.layers.dense(z, 7*7*128, activation=tf.nn.relu)
x = tf.reshape(x, (-1, 7, 7, 128))
x = tf.layers.batch_normalization(x, training=is_train)
# input shape [7,7,128]
x = tf.layers.conv2d_transpose(x, 96, 5, strides=2, activation=tf.nn.relu,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.layers.batch_normalization(x, training=is_train)
# input shape [14,14,96]
x = tf.layers.conv2d_transpose(x, 32, 5, strides=2, activation=tf.nn.relu,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.layers.batch_normalization(x, training=is_train)
# input shape [28,28,32]
generator_logits = \
tf.layers.conv2d_transpose(x, out_channel_dim, 3, strides=1,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
# input shape [28,28,3]
generator_outputs = tf.tanh(generator_logits)
return generator_outputs
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
test_generator(generator, tf)
hide_code
def generator_l(z, out_channel_dim, is_train=True, alpha=alpha0):
"""
Create the generator network
:param z: Input z
:param out_channel_dim: The number of channels in the output image
:param is_train: Boolean if generator is being used for training
:return: The tensor output of the generator
"""
# TODO: Implement Function
with tf.variable_scope('generator', reuse=(not is_train)):
x = tf.layers.dense(z, 7*7*128)
x = tf.reshape(x, (-1, 7, 7, 128))
x = tf.maximum(x * alpha, x) # leaky_relu activation
x = tf.layers.batch_normalization(x, training=is_train)
# input shape [7,7,128]
x = tf.layers.conv2d_transpose(x, 96, 5, strides=2,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.maximum(x * alpha, x) # leaky_relu activation
x = tf.layers.batch_normalization(x, training=is_train)
# input shape [14,14,96]
x = tf.layers.conv2d_transpose(x, 32, 5, strides=2,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
x = tf.maximum(x * alpha, x) # leaky_relu activation
x = tf.layers.batch_normalization(x, training=is_train)
# input shape [28,28,32]
generator_logits = \
tf.layers.conv2d_transpose(x, out_channel_dim, 3, strides=1,
kernel_initializer=tf.random_normal_initializer(stddev=stddev0),
padding='same')
# input shape [28,28,3]
generator_outputs = tf.tanh(generator_logits)
return generator_outputs
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
test_generator(generator_l, tf)
Implement model_loss
to build the GANs for training and calculate the loss. The function should return a tuple of (discriminator loss, generator loss). Use the following functions you implemented:
discriminator(images, reuse=False)
generator(z, out_channel_dim, is_train=True)
hide_code
def model_loss(input_real, input_z, out_channel_dim, alpha=alpha0, index="leaky_relu"):
"""
Get the loss for the discriminator and generator
:param input_real: Images from the real dataset
:param input_z: Z input
:param out_channel_dim: The number of channels in the output image
:return: A tuple of (discriminator loss, generator loss)
"""
# TODO: Implement Function
if index=="relu":
img_generator = generator(input_z, out_channel_dim, is_train=True, alpha=alpha)
real_outputs, real_logits = discriminator(input_real, reuse=False, alpha=alpha)
generator_outputs, generator_logits = discriminator(img_generator, reuse=True, alpha=alpha)
elif index=="leaky_relu":
img_generator = generator_l(input_z, out_channel_dim, is_train=True, alpha=alpha)
real_outputs, real_logits = discriminator_l(input_real, alpha=alpha)
generator_outputs, generator_logits = discriminator_l(img_generator, reuse=True, alpha=alpha)
real_labels = tf.ones_like(real_outputs) * (1 - alpha)
zeros_labels = tf.zeros_like(generator_outputs)
ones_labels = tf.ones_like(generator_outputs)
real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=real_logits,
labels=real_labels))
zeros_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=generator_logits,
labels=zeros_labels))
ones_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=generator_logits,
labels=ones_labels))
return real_loss + zeros_loss, ones_loss
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
test_model_loss(model_loss)
Implement model_opt
to create the optimization operations for the GANs. Use tf.trainable_variables
to get all the trainable variables. Filter the variables with names that are in the discriminator and generator scope names. The function should return a tuple of (discriminator training operation, generator training operation).
hide_code
def model_opt(discriminator_loss, generator_loss, learning_rate, beta1):
"""
Get optimization operations
:param d_loss: Discriminator loss Tensor
:param g_loss: Generator loss Tensor
:param learning_rate: Learning Rate Placeholder
:param beta1: The exponential decay rate for the 1st moment in the optimizer
:return: A tuple of (discriminator training operation, generator training operation)
"""
# TODO: Implement Function
trainable_variables = tf.trainable_variables()
discriminator_trainable_variables = \
[v for v in trainable_variables if v.name.startswith('discriminator')]
generator_trainable_variables = \
[v for v in trainable_variables if v.name.startswith('generator')]
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
discriminator_update_ops = \
[u for u in update_ops if u.name.startswith('discriminator')]
generator_update_ops = \
[u for u in update_ops if u.name.startswith('generator')]
with tf.control_dependencies(discriminator_update_ops):
discriminator_training_operations = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1).\
minimize(discriminator_loss, var_list=discriminator_trainable_variables)
with tf.control_dependencies(generator_update_ops):
generator_training_operations = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1).\
minimize(generator_loss, var_list=generator_trainable_variables)
return discriminator_training_operations, generator_training_operations
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
test_model_opt(model_opt, tf)
hide_code
"""DON'T MODIFY ANYTHING IN THIS CELL"""
def show_generator_output(sess, n_images, input_z, out_channel_dim, image_mode):
"""
Show example output for the generator
:param sess: TensorFlow session
:param n_images: Number of Images to display
:param input_z: Input Z Tensor
:param out_channel_dim: The number of channels in the output image
:param image_mode: The mode to use for images ("RGB" or "L")
"""
cmap = None if image_mode == 'RGB' else 'gray'
z_dim = input_z.get_shape().as_list()[-1]
example_z = np.random.uniform(-1, 1, size=[n_images, z_dim])
samples = sess.run(generator_l(input_z, out_channel_dim, False),
feed_dict={input_z: example_z})
images_grid = images_square_grid(samples, image_mode)
pyplot.imshow(images_grid, cmap=cmap)
pyplot.show()
Implement train
to build and train the GANs. Use the following functions you implemented:
model_inputs(image_width, image_height, image_channels, z_dim)
model_loss(input_real, input_z, out_channel_dim)
model_opt(d_loss, g_loss, learning_rate, beta1)
Use the show_generator_output
to show generator
output while you train. Running show_generator_output
for every batch will drastically increase training time and increase the size of the notebook. It's recommended to print the generator
output every 100 batches.
hide_code
def train(epoch_count, batch_size, z_dim, learning_rate, beta1,
get_batches, data_shape, data_image_mode, print_step, show_step):
"""
Train the GAN
:param epoch_count: Number of epochs
:param batch_size: Batch Size
:param z_dim: Z dimension
:param learning_rate: Learning Rate
:param beta1: The exponential decay rate for the 1st moment in the optimizer
:param get_batches: Function to get batches
:param data_shape: Shape of the data
:param data_image_mode: The image mode to use for images ("RGB" or "L")
"""
# TODO: Build Model
input_real, input_z, input_learning_rate = \
model_inputs(data_shape[1], data_shape[2], data_shape[3], z_dim)
discriminator_loss, generator_loss = \
model_loss(input_real, input_z, data_shape[3], alpha=alpha0, index="leaky_relu")
discriminator_training_operations, generator_training_operations = \
model_opt(discriminator_loss, generator_loss, learning_rate, beta1)
train_step = 0
DTL, GTL = [], []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch_i in range(epoch_count):
for batch_images in get_batches(batch_size):
# TODO: Train Model
train_step += 1
batch_images *= 2.0
batch_z = np.random.uniform(-1, 1, size=(batch_size, z_dim))
_ = sess.run(discriminator_training_operations,
feed_dict={input_real: batch_images, input_z: batch_z})
_ = sess.run(generator_training_operations,
feed_dict={input_z: batch_z})
if train_step % print_step == 0:
discriminator_training_loss = \
discriminator_loss.eval({input_real: batch_images, input_z: batch_z})
generator_training_loss= \
generator_loss.eval({input_z: batch_z})
print("Epoch {}/{}| Step {}|".format(epoch_i + 1, epochs, train_step),
"Discriminator Loss:{:.5f}|".format(discriminator_training_loss),
"Generator Loss:{:.5f}|".format(generator_training_loss),
"Discriminator Loss>Generator Loss: {}"\
.format(discriminator_training_loss>generator_training_loss))
DTL.append(discriminator_training_loss)
GTL.append(generator_training_loss)
if train_step % show_step == 0:
show_generator_output(sess, 25, input_z, data_shape[3], data_image_mode)
show_generator_output(sess, 25, input_z, data_shape[3], data_image_mode)
pyplot.figure(figsize=(18, 6))
pyplot.plot(DTL, label = 'discriminator')
pyplot.plot(GTL, label = 'generator')
pyplot.legend()
pyplot.title('Loss Function');
Test your GANs architecture on MNIST. After 2 epochs, the GANs should be able to generate images that look like handwritten digits. Make sure the loss of the generator is lower than the loss of the discriminator or close to 0.
hide_code
batch_size = 32
z_dim = 128
learning_rate = 0.0002
beta1 = 0.5
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
epochs = 2
mnist_dataset = Dataset('mnist', glob(os.path.join(data_dir, 'mnist/*.jpg')))
with tf.Graph().as_default():
train(epochs, batch_size, z_dim, learning_rate, beta1,
mnist_dataset.get_batches,
mnist_dataset.shape, mnist_dataset.image_mode, 50, 500)
Run your GANs on CelebA. It will take around 20 minutes on the average GPU to run one epoch. You can run the whole epoch or stop when it starts to generate realistic faces.
hide_code
# batch_size = 32
# z_dim = 128
# learning_rate = 0.0002
# beta1 = 0.5
"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""
epochs = 1
celeba_dataset = Dataset('celeba', glob(os.path.join(data_dir, 'img_align_celeba/*.jpg')))
with tf.Graph().as_default():
train(epochs, batch_size, z_dim, learning_rate, beta1,
celeba_dataset.get_batches,
celeba_dataset.shape, celeba_dataset.image_mode, 50, 500)
When submitting this project, make sure to run all the cells before saving the notebook. Save the notebook file as "dlnd_face_generation.ipynb" and save it as a HTML file under "File" -> "Download as". Include the "helper.py" and "problem_unittests.py" files in your submission.