Deep Learning

Practice Projects

P5 Additional: Mixed Styles

Perfect and complete explanation - Artistic Style Transfer by Naoki Shibuya

In [1]:
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Orbitron|Roboto');
body {background-color: aliceblue;} 
a {color: #4876ff; font-family: 'Roboto';} 
h1 {color: #348ABD; font-family: 'Orbitron'; text-shadow: 4px 4px 4px #ccc;} 
h2, h3 {color: slategray; font-family: 'Roboto'; text-shadow: 4px 4px 4px #ccc;}
h4 {color: #348ABD; font-family: 'Orbitron';}
span {text-shadow: 4px 4px 4px #ccc;}
div.output_prompt, div.output_area pre {color: slategray;}
div.input_prompt, div.output_subarea {color: #4876ff;}      
div.output_stderr pre {background-color: aliceblue;}  
div.output_stderr {background-color: slategrey;}                        
</style>
<script>
code_show = true; 
function code_display() {
    if (code_show) {
        $('div.input').each(function(id) {
            if (id == 0 || $(this).html().indexOf('hide_code') > -1) {$(this).hide();}
        });
        $('div.output_prompt').css('opacity', 0);
    } else {
        $('div.input').each(function(id) {$(this).show();});
        $('div.output_prompt').css('opacity', 1);
    };
    code_show = !code_show;
} 
$(document).ready(code_display);
</script>
<form action="javascript: code_display()">
<input style="color: #348ABD; background: aliceblue; opacity: 0.8;" \ 
type="submit" value="Click to display or hide code cells">
</form>     
In [2]:
hide_code = ''
import numpy as np 
import math
import scipy.misc

import tensorflow as tf
import keras
import cv2

from tqdm import tqdm

import matplotlib.pylab as plt
from matplotlib import cm
%matplotlib inline
Using TensorFlow backend.

Display Images

In [3]:
hide_code
# Read from files and display images using OpenCV
def display_images(original, style):
    original_img = cv2.imread("images/" + original)
    style_img = cv2.imread("images/" + style)
    
    plt.figure(1, figsize=(18,6))
    plt.subplot(121)
    plt.title("Original image shape:  " + str(original_img .shape))
    plt.imshow(cv2.cvtColor(original_img , cv2.COLOR_BGR2RGB))
    plt.subplot(122)
    plt.title("Style image shape:  " + str(style_img.shape))
    plt.imshow(cv2.cvtColor(style_img, cv2.COLOR_BGR2RGB));
In [4]:
hide_code
display_images('02.png', '04.png')
In [5]:
hide_code
display_images('03.png', '06.png')

Preprocess

In [78]:
hide_code
# Create image tensors and resize the biggest one; Pair 1
img02 = cv2.imread("images/" + "02.png").astype('float32')
img04 = cv2.imread("images/" + "04.png").astype('float32')
img02 = scipy.misc.imresize(img02, img04.shape[:2]).astype('float32')
img02.shape, img04.shape
Out[78]:
((448, 640, 3), (448, 640, 3))
In [79]:
hide_code
# Create image tensors and resize the biggest one; Pair 2
img03 = cv2.imread("images/" + "03.png").astype('float32')
img06 = cv2.imread("images/" + "06.png").astype('float32')
img03 = scipy.misc.imresize(img03, img06.shape[:2]).astype('float32')
img03.shape, img06.shape
Out[79]:
((510, 313, 3), (510, 313, 3))
In [80]:
hide_code
# Preprocess images for VGG16
def preprocess(img):
    img = img.copy()                   
    img = np.expand_dims(img, axis=0) 
    return keras.applications.vgg16.preprocess_input(img)
In [81]:
hide_code
# Function for input tensors
def inputs(original_img, style_img):
    original_input   = tf.constant(preprocess(original_img))
    style_input     = tf.constant(preprocess(style_img))
    generated_input = tf.placeholder(tf.float32, original_input.shape)
    return original_input, style_input, generated_input
In [82]:
hide_code
# Create input tensors; Pair 1
original_input, style_input, generated_input = inputs(img02, img04)
input_tensor = tf.concat([original_input, style_input, generated_input], axis=0)
input_tensor.shape
Out[82]:
TensorShape([Dimension(3), Dimension(448), Dimension(640), Dimension(3)])
In [83]:
hide_code
# Create input tensors; Pair 2
original_input2, style_input2, generated_input2 = inputs(img03, img06)
input_tensor2 = tf.concat([original_input2, style_input2, generated_input2], axis=0)
input_tensor2.shape
Out[83]:
TensorShape([Dimension(3), Dimension(510), Dimension(313), Dimension(3)])

VGG16 Usage

In [84]:
hide_code
# Create the model using keras applications; Pair 1
vgg16_model = keras.applications.vgg16.VGG16(input_tensor=input_tensor, include_top=False)
vgg16_model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_3 (InputLayer)         (3, 448, 640, 3)          0         
_________________________________________________________________
block1_conv1 (Conv2D)        (3, 448, 640, 64)         1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (3, 448, 640, 64)         36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (3, 224, 320, 64)         0         
_________________________________________________________________
block2_conv1 (Conv2D)        (3, 224, 320, 128)        73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (3, 224, 320, 128)        147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (3, 112, 160, 128)        0         
_________________________________________________________________
block3_conv1 (Conv2D)        (3, 112, 160, 256)        295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (3, 112, 160, 256)        590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (3, 112, 160, 256)        590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (3, 56, 80, 256)          0         
_________________________________________________________________
block4_conv1 (Conv2D)        (3, 56, 80, 512)          1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (3, 56, 80, 512)          2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (3, 56, 80, 512)          2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (3, 28, 40, 512)          0         
_________________________________________________________________
block5_conv1 (Conv2D)        (3, 28, 40, 512)          2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (3, 28, 40, 512)          2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (3, 28, 40, 512)          2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (3, 14, 20, 512)          0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
In [85]:
hide_code
# Create the model using keras applications; Pair 2
vgg16_model2 = keras.applications.vgg16.VGG16(input_tensor=input_tensor2, include_top=False)
vgg16_model2.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_4 (InputLayer)         (3, 510, 313, 3)          0         
_________________________________________________________________
block1_conv1 (Conv2D)        (3, 510, 313, 64)         1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (3, 510, 313, 64)         36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (3, 255, 156, 64)         0         
_________________________________________________________________
block2_conv1 (Conv2D)        (3, 255, 156, 128)        73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (3, 255, 156, 128)        147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (3, 127, 78, 128)         0         
_________________________________________________________________
block3_conv1 (Conv2D)        (3, 127, 78, 256)         295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (3, 127, 78, 256)         590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (3, 127, 78, 256)         590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (3, 63, 39, 256)          0         
_________________________________________________________________
block4_conv1 (Conv2D)        (3, 63, 39, 512)          1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (3, 63, 39, 512)          2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (3, 63, 39, 512)          2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (3, 31, 19, 512)          0         
_________________________________________________________________
block5_conv1 (Conv2D)        (3, 31, 19, 512)          2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (3, 31, 19, 512)          2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (3, 31, 19, 512)          2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (3, 15, 9, 512)           0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
In [86]:
hide_code
# Create layer dictionaries
vgg16_layer_dict = {layer.name:layer for layer in vgg16_model.layers}
vgg16_layer_dict2 = {layer.name:layer for layer in vgg16_model2.layers}
In [87]:
hide_code
# Functions for loss calculation
def calculate_original_loss(layer_dict, original_layer_names):
    loss = 0
    for name in original_layer_names:
        layer = layer_dict[name]
        original_features = layer.output[0, :, :, :]  
        generated_features = layer.output[2, :, :, :] 
        loss += keras.backend.sum(keras.backend.square(generated_features - original_features))
    return loss / len(original_layer_names)

def gram_matrix(x):    
    features = keras.backend.batch_flatten(keras.backend.permute_dimensions(x, (2, 0, 1))) 
    gram = keras.backend.dot(features, keras.backend.transpose(features))
    return gram

def get_style_loss(style_features, generated_features):
    S = gram_matrix(style_features)
    G = gram_matrix(generated_features)
    channels = 3
    size = img04.shape[0]*img04.shape[1]
    return keras.backend.sum(keras.backend.square(S - G)) / (4. * (channels**2) * (size**2))

def calculate_style_loss(layer_dict, style_layer_names):
    loss = 0
    for name in style_layer_names:
        layer = layer_dict[name]
        style_features     = layer.output[1, :, :, :] # style features
        generated_features = layer.output[2, :, :, :] # generated features
        loss += get_style_loss(style_features, generated_features) 
    return loss / len(style_layer_names)

def calculate_variation_loss(x):
    row_diff = keras.backend.square(x[:, :-1, :-1, :] - x[:, 1:,    :-1, :])
    col_diff = keras.backend.square(x[:, :-1, :-1, :] - x[:,  :-1, 1:,   :])
    return keras.backend.sum(keras.backend.pow(row_diff + col_diff, 1.25))
In [113]:
hide_code
# Calculate all losses; Pair 1
original_loss = calculate_original_loss(vgg16_layer_dict, ['block5_conv2'])

style_layers = ['block1_conv1','block2_conv1','block3_conv1','block4_conv1', 'block5_conv1']
style_loss = calculate_style_loss(vgg16_layer_dict, style_layers)

variation_loss = calculate_variation_loss(generated_input)
In [114]:
hide_code
# Calculate all losses; Pair 2
original_loss2 = calculate_original_loss(vgg16_layer_dict2, ['block5_conv2'])

style_layers = ['block1_conv1','block2_conv1','block3_conv1','block4_conv1', 'block5_conv1']
style_loss2 = calculate_style_loss(vgg16_layer_dict2, style_layers)

variation_loss2 = calculate_variation_loss(generated_input2)
In [124]:
hide_code
### Pair 1 ###
# Loss and gradients calculation
loss = 1.0 * original_loss + 1.0 * style_loss + 0.1 * variation_loss
        
gradients = keras.backend.gradients(loss, generated_input)[0]
calculate = keras.backend.function([generated_input], [loss, gradients])

# Preprocess the image
generated_data = preprocess(img02)
 
# Generate the new image 
for i in tqdm(range(10)):
    _, gradients_value = calculate([generated_data])
    generated_data -= gradients_value * 0.001
100%|██████████| 10/10 [14:34<00:00, 70.73s/it]
In [125]:
hide_code
### Pair 2 ###
# Loss and gradients calculation
loss2 = 1.0 * original_loss2 + 1.0 * style_loss2 + 0.1 * variation_loss2
        
gradients2 = keras.backend.gradients(loss2, generated_input2)[0]
calculate2 = keras.backend.function([generated_input2], [loss2, gradients2])

# Preprocess the image
generated_data2 = preprocess(img03)
 
# Generate the new image 
for i in tqdm(range(10)):
    _, gradients_value2 = calculate2([generated_data2])
    generated_data2 -= gradients_value2 * 0.001
100%|██████████| 10/10 [03:52<00:00, 22.43s/it]

Display Style Transfer

In [126]:
hide_code
# Save the result; Pair 1
np.save('generated_data_01.npy', generated_data)
In [127]:
hide_code
# Save the result; Pair 2
np.save('generated_data2_01.npy', generated_data2)
In [128]:
hide_code
# Load the result; Pair 1
generated_data_01 = np.load('generated_data_01.npy')
generated_data_02 = np.load('generated_data_02.npy')
In [129]:
hide_code
# Load the result; Pair 2
generated_data2_01 = np.load('generated_data2_01.npy')
generated_data2_02 = np.load('generated_data2_02.npy')
In [121]:
hide_code
# Reverse of preprocessing
def deprocess(img):
    img = img.copy()                   
    img = img[0]                        
    img[:, :, 0] += 103.939           
    img[:, :, 1] += 116.779             
    img[:, :, 2] += 123.68             
    img = img[:, :, ::-1]              
    img = np.clip(img, 0, 255)         
    return img.astype('uint8')  

Pair 1

In [130]:
hide_code
# Display the generated image
generated_image01 = deprocess(generated_data_01)
generated_image02 = deprocess(generated_data_02)

plt.figure(1, figsize=(18,6))
plt.subplot(121)
plt.title("Loss function:  1.0 * original_loss + 1.0 * style_loss + 0.1 * variation_loss; 10 steps")
plt.imshow(cv2.cvtColor(generated_image01, cv2.COLOR_BGR2RGB))
plt.subplot(122)
plt.title("Loss function:  0.5 * original_loss + 1.0 * style_loss + 0.1 * variation_loss; 50 steps")
plt.imshow(cv2.cvtColor(generated_image02, cv2.COLOR_BGR2RGB));

Pair 2

In [133]:
hide_code
# Display the generated image
generated_image201 = deprocess(generated_data2_01)
generated_image202 = deprocess(generated_data2_02)

plt.figure(1, figsize=(18,8))
plt.subplot(121)
plt.title("Loss function:  1.0 * original_loss + 1.0 * style_loss + 0.1 * variation_loss; 10 steps")
plt.imshow(cv2.cvtColor(generated_image201, cv2.COLOR_BGR2RGB))
plt.subplot(122)
plt.title("Loss function:  0.5 * original_loss + 1.0 * style_loss + 0.1 * variation_loss; 100 steps")
plt.imshow(cv2.cvtColor(generated_image202, cv2.COLOR_BGR2RGB));