%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Orbitron|Roboto');
body {background-color: aliceblue;}
a {color: #4876ff; font-family: 'Roboto';}
h1 {color: #348ABD; font-family: 'Orbitron'; text-shadow: 4px 4px 4px #ccc;}
h2, h3 {color: slategray; font-family: 'Roboto'; text-shadow: 4px 4px 4px #ccc;}
h4 {color: #348ABD; font-family: 'Orbitron';}
span {text-shadow: 4px 4px 4px #ccc;}
div.output_prompt, div.output_area pre {color: slategray;}
div.input_prompt, div.output_subarea {color: #4876ff;}
div.output_stderr pre {background-color: aliceblue;}
div.output_stderr {background-color: slategrey;}
</style>
<script>
code_show = true;
function code_display() {
if (code_show) {
$('div.input').each(function(id) {
if (id == 0 || $(this).html().indexOf('hide_code') > -1) {$(this).hide();}
});
$('div.output_prompt').css('opacity', 0);
} else {
$('div.input').each(function(id) {$(this).show();});
$('div.output_prompt').css('opacity', 1);
};
code_show = !code_show;
}
$(document).ready(code_display);
</script>
<form action="javascript: code_display()">
<input style="color: #348ABD; background: aliceblue; opacity: 0.8;" \
type="submit" value="Click to display or hide code cells">
</form>
hide_code = ''
import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import ImageFile
from tqdm import tqdm
import h5py
import cv2
import matplotlib.pylab as plt
from matplotlib import cm
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from keras.utils import to_categorical
from keras.preprocessing import image as keras_image
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model, Model
from keras.layers import Input, BatchNormalization
from keras.layers import Dense, LSTM, GlobalAveragePooling1D, GlobalAveragePooling2D
from keras.layers import Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D
from keras.layers.advanced_activations import PReLU, LeakyReLU
from keras.applications.inception_v3 import InceptionV3, preprocess_input
import scipy
from scipy import misc
hide_code
# Plot the Neural network fitting history
def history_plot(fit_history, n):
plt.figure(figsize=(18, 12))
plt.subplot(211)
plt.plot(fit_history.history['loss'][n:], color='slategray', label = 'train')
plt.plot(fit_history.history['val_loss'][n:], color='#4876ff', label = 'valid')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title('Loss Function');
plt.subplot(212)
plt.plot(fit_history.history['acc'][n:], color='slategray', label = 'train')
plt.plot(fit_history.history['val_acc'][n:], color='#4876ff', label = 'valid')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.title('Accuracy');
For this project, I have created the dataset of 1650 (50x33) color images (32x32x3) with 33 handwritten letters.
Run the following cell to download the dataset.
hide_code
# Function for processing an image
def image_to_tensor(img_path):
img = keras_image.load_img("data/" + img_path, target_size=(32, 32))
x = keras_image.img_to_array(img)
return np.expand_dims(x, axis=0)
# Function for creating the data tensor
def data_to_tensor(img_paths):
list_of_tensors = [image_to_tensor(img_path) for img_path in tqdm(img_paths)]
return np.vstack(list_of_tensors)
ImageFile.LOAD_TRUNCATED_IMAGES = True
# Load the data
data = pd.read_csv("data/letters.csv")
files = data['file']
letters = data['letter']
backgrounds = data['background']
targets = data['label'].values
tensors = data_to_tensor(files);
hide_code
# Print the shape
print ('Tensor shape:', tensors.shape)
print ('Target shape', targets.shape)
hide_code
# Read from files and display images using OpenCV
def display_images(img_path, ax):
img = cv2.imread("data/" + img_path)
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
fig = plt.figure(figsize=(18, 6))
for i in range(12):
ax = fig.add_subplot(2, 6, i + 1, xticks=[], yticks=[], title=letters[i*50])
display_images(files[i*50], ax)
The data tensors can be saved in the appropriate format of files .h5.
hide_code
# Create the tensor file
with h5py.File('LetterColorImages.h5', 'w') as f:
f.create_dataset('images', data = tensors)
f.create_dataset('labels', data = targets)
f.create_dataset('backgrounds', data = backgrounds)
f.close()
hide_code
# Read the h5 file
f = h5py.File('LetterColorImages.h5', 'r')
# List all groups
keys = list(f.keys())
keys
hide_code
# Create tensors and targets
tensors = np.array(f[keys[1]])
targets = np.array(f[keys[2]])
print ('Tensor shape:', tensors.shape)
print ('Target shape', targets.shape)
hide_code
# Normalize the tensors
tensors = tensors.astype('float32')/255
hide_code
# Read and display a tensor using Matplotlib
print('Label: ', letters[100])
plt.figure(figsize=(3,3))
plt.imshow(tensors[100]);
Create tensors of grayscaled images and display their shape.
hide_code
# Grayscaled tensors
gray_tensors = np.dot(tensors[...,:3], [0.299, 0.587, 0.114])
print ('Grayscaled Tensor shape:', gray_tensors.shape)
hide_code
# Read and display a grayscaled tensor using Matplotlib
print('Label: ', letters[100])
plt.figure(figsize=(3,3))
plt.imshow(gray_tensors[100], cmap=cm.bone);
Now we'll implement the one-hot encoding function to_categorical.
hide_code
# Print the target unique values
print(set(targets))
hide_code
# One-hot encode the targets, started from the zero label
cat_targets = to_categorical(np.array(targets-1), 33)
cat_targets.shape
hide_code
# One-hot encode the background targets
backgrounds = to_categorical(backgrounds, 2)
backgrounds.shape
hide_code
# Create multi-label targets
back_targets = np.concatenate((cat_targets, backgrounds), axis=1)
back_targets.shape
hide_code
# Split the data
x_train, x_test, y_train, y_test = train_test_split(tensors, cat_targets,
test_size = 0.2,
random_state = 1)
n = int(len(x_test)/2)
x_valid, y_valid = x_test[:n], y_test[:n]
x_test, y_test = x_test[n:], y_test[n:]
hide_code
# Print the shape
x_train.shape, x_valid.shape, x_test.shape, y_train.shape, y_valid.shape, y_test.shape
hide_code
# Split the grayscaled data
x_train2, x_test2, y_train2, y_test2 = train_test_split(gray_tensors, cat_targets,
test_size = 0.2,
random_state = 1)
x_valid2, y_valid2 = x_test2[:n], y_test2[:n]
x_test2, y_test2 = x_test2[n:], y_test2[n:]
hide_code
# Reshape the grayscaled data
x_train2, x_test2, x_valid2 = \
x_train2.reshape(-1, 32, 32, 1), x_test2.reshape(-1, 32, 32, 1), x_valid2.reshape(-1, 32, 32, 1)
hide_code
# Print the shape
x_train2.shape, x_valid2.shape, x_test2.shape, y_train2.shape, y_valid2.shape, y_test2.shape
hide_code
# Convert images from grayscaled to RGB
x_train2_tensor = tf.image.grayscale_to_rgb(x_train2, name=None)
x_test2_tensor = tf.image.grayscale_to_rgb(x_test2, name=None)
x_valid2_tensor = tf.image.grayscale_to_rgb(x_valid2, name=None)
# Run tensorflow session
sess = tf.Session()
with sess.as_default():
x_train2_color = x_train2_tensor.eval()
x_test2_color = x_test2_tensor.eval()
x_valid2_color = x_valid2_tensor.eval()
# Check the shape
x_train2_color.shape, x_test2_color.shape, x_valid2_color.shape
hide_code
# Split with multi-label targets
x_train3, x_test3, y_train3, y_test3 = train_test_split(tensors, back_targets,
test_size = 0.2,
random_state = 1)
x_valid3, y_valid3 = x_test3[:n], y_test3[:n]
x_test3, y_test3 = x_test3[n:], y_test3[n:]
hide_code
# Print the shape
x_train3.shape, x_valid3.shape, x_test3.shape, y_train3.shape, y_valid3.shape, y_test3.shape
hide_code
# Create a list of targets
y_train3_list = [y_train3[:, :33], y_train3[:, 33:]]
y_test3_list = [y_test3[:, :33], y_valid3[:, 33:]]
y_valid3_list = [y_valid3[:, :33], y_valid3[:, 33:]]
hide_code
# Split the grayscaled data
x_train4, x_test4, y_train4, y_test4 = train_test_split(gray_tensors, back_targets,
test_size = 0.2,
random_state = 1)
x_valid4, y_valid4 = x_test4[:n], y_test4[:n]
x_test4, y_test4 = x_test4[n:], y_test4[n:]
hide_code
# Reshape the grayscaled data
x_train4, x_test4, x_valid4 = \
x_train4.reshape(-1, 32, 32, 1), x_test4.reshape(-1, 32, 32, 1), x_valid4.reshape(-1, 32, 32, 1)
hide_code
# Print the shape
x_train4.shape, x_valid4.shape, x_test4.shape, y_train4.shape, y_valid4.shape, y_test4.shape
hide_code
# Create a list of targets
y_train4_list = [y_train4[:, :33], y_train4[:, 33:]]
y_test4_list = [y_test4[:, :33], y_test4[:, 33:]]
y_valid4_list = [y_valid4[:, :33], y_valid4[:, 33:]]
hide_code
def model():
model = Sequential()
# TODO: Define a model architecture
# TODO: Compile the model
return model
model = model()
hide_code
# Create callbacks
checkpointer = ModelCheckpoint(filepath='weights.best.model.hdf5',
verbose=2, save_best_only=True)
lr_reduction = ReduceLROnPlateau(monitor='val_loss',
patience=5, verbose=2, factor=0.2)
hide_code
# Train the model
history = model.fit(x_train, y_train,
epochs=100, batch_size=64, verbose=2,
validation_data=(x_valid, y_valid),
callbacks=[checkpointer, lr_reduction])
We should have an accuracy greater than 3%
hide_code
# Load the model with the best validation accuracy
model.load_weights('weights.best.model.hdf5')
# Calculate classification accuracy on the testing set
score = model.evaluate(x_test, y_test)
score
Apply the ImageDataGenerator() function.
hide_code
# Fit the model with ImageDataGenerator()
# TODO: Define parameters
# steps, epochs = ,
data_generator = ImageDataGenerator(zoom_range=0.2, shear_range=0.2, rotation_range=20)
generator = model.fit_generator(data_generator.flow(x_train, y_train, batch_size=64),
steps_per_epoch = steps, epochs = epochs,
validation_data = (x_valid, y_valid),
callbacks=[checkpointer, lr_reduction], verbose=2)
hide_code
# Load the model with the best validation accuracy
model.load_weights('weights.best.model.hdf5')
# Calculate classification accuracy on the testing set
score = model.evaluate(x_test, y_test)
score
Let's compare the results with classifying algorithms.
hide_code
# Fit the classifier and get the accuracy score
y_train_c = np.array([np.argmax(y) for y in y_train])
y_test_c = np.array([np.argmax(y) for y in y_test])
clf = GradientBoostingClassifier().fit(x_train.reshape(-1, 32*32*3), y_train_c)
clf.score(x_test.reshape(-1, 32*32*3), y_test_c)
hide_code
# Fit the classifier and get the accuracy score
clf2 = RandomForestClassifier().fit(x_train.reshape(-1, 32*32*3), y_train_c)
clf2.score(x_test.reshape(-1, 32*32*3), y_test_c)
hide_code
def gray_model():
model = Sequential()
# TODO: Define a model architecture
# TODO: Compile the model
return model
gray_model = gray_model()
hide_code
# Create callbacks
gray_checkpointer = ModelCheckpoint(filepath='weights.best.gray_model.hdf5',
verbose=2, save_best_only=True)
gray_lr_reduction = ReduceLROnPlateau(monitor='val_loss',
patience=10, verbose=2, factor=0.8)
hide_code
# Train the model
gray_history = gray_model.fit(x_train2, y_train2,
epochs=200, batch_size=64, verbose=0,
validation_data=(x_valid2, y_valid2),
callbacks=[gray_checkpointer, gray_lr_reduction])
hide_code
# Plot the training history
history_plot(gray_history, 0)
Try to reach an accuracy greater than 50%
hide_code
# Load the model with the best validation accuracy
gray_model.load_weights('weights.best.gray_model.hdf5')
# Calculate classification accuracy on the testing set
gray_score = gray_model.evaluate(x_test2, y_test2)
gray_score
Apply the ImageDataGenerator() function.
hide_code
# Fit the model with ImageDataGenerator()
# TODO: Define parameters
steps, epochs = ,
data_generator = ImageDataGenerator(zoom_range=0.2, shear_range=0.2, rotation_range=20)
gray_generator = gray_model.fit_generator(data_generator.flow(x_train2, y_train2, batch_size=64),
steps_per_epoch = steps, epochs = epochs,
validation_data = (x_valid2, y_valid2),
callbacks=[gray_checkpointer, gray_lr_reduction], verbose=2)
hide_code
# Load the model with the best validation accuracy
gray_model.load_weights('weights.best.gray_model.hdf5')
# Calculate classification accuracy on the testing set
gray_score = gray_model.evaluate(x_test2, y_test2)
gray_score
Let's compare the results with classifying algorithms.
hide_code
# Fit the classifier and get the accuracy score
y_train2_c = np.array([np.argmax(y) for y in y_train2])
y_test2_c = np.array([np.argmax(y) for y in y_test2])
clf = GradientBoostingClassifier().fit(x_train2.reshape(-1, 32*32), y_train2_c)
clf.score(x_test2.reshape(-1, 32*32), y_test2_c)
hide_code
# Fit the classifier and get the accuracy score
clf2 = RandomForestClassifier().fit(x_train2.reshape(-1, 32*32), y_train2_c)
clf2.score(x_test2.reshape(-1, 32*32), y_test2_c)
hide_code
def multi_model():
model_input = Input(shape=(32, 32, 3))
x = BatchNormalization()(model_input)
# TODO: Define a model architecture
y1 = Dense(33, activation='softmax')(x)
y2 = Dense(2, activation='softmax')(x)
model = Model(inputs=model_input, outputs=[y1, y2])
# TODO: Compile the model
return model
multi_model = multi_model()
hide_code
# Display the model architecture
multi_model.summary()
hide_code
# Create callbacks
multi_checkpointer = ModelCheckpoint(filepath='weights.best.multi.hdf5',
verbose=2, save_best_only=True)
multi_lr_reduction = ReduceLROnPlateau(monitor='val_loss',
patience=5, verbose=2, factor=0.8)
hide_code
# Train the model
multi_history = multi_model.fit(x_train3, y_train3_list,
validation_data=(x_valid3, y_valid3_list),
epochs=100, batch_size=64, verbose=0,
callbacks=[multi_checkpointer, multi_lr_reduction])
We should have an accuracy greater than 3% for the first target (letter) and greater than 50% for the second target (background).
hide_code
# Load the model with the best validation accuracy
multi_model.load_weights('weights.best.multi.hdf5')
# Calculate classification accuracy on the testing set
multi_scores = multi_model.evaluate(x_test3, y_test3_list, verbose=0)
print("Scores: \n" , (multi_scores))
print("First label. Accuracy: %.2f%%" % (multi_scores[3]*100))
print("Second label. Accuracy: %.2f%%" % (multi_scores[4]*100))
hide_code
def gray_multi_model():
model_input = Input(shape=(32, 32, 1))
x = BatchNormalization()(model_input)
# TODO: Define a model architecture
y1 = Dense(33, activation='softmax')(x)
y2 = Dense(2, activation='softmax')(x)
model = Model(inputs=model_input, outputs=[y1, y2])
# TODO: Compile the model
return model
gray_multi_model = gray_multi_model()
hide_code
# Display the model architecture
gray_multi_model.summary()
hide_code
# Create callbacks
gray_multi_checkpointer = ModelCheckpoint(filepath='weights.best.gray_multi.hdf5',
verbose=2, save_best_only=True)
gray_multi_lr_reduction = ReduceLROnPlateau(monitor='val_loss',
patience=10, verbose=2, factor=0.8)
hide_code
# Train the model
gray_multi_history = gray_multi_model.fit(x_train4, y_train4_list,
validation_data=(x_valid4, y_valid4_list),
epochs=100, batch_size=64, verbose=0,
callbacks=[gray_multi_checkpointer, gray_multi_lr_reduction])
We should have an accuracy greater than 3% for the first target (letter) and greater than 50% for the second target (background).
hide_code
# Load the model with the best validation accuracy
gray_multi_model.load_weights('weights.best.gray_multi.hdf5')
# Calculate classification accuracy on the testing set
gray_multi_scores = gray_multi_model.evaluate(x_test4, y_test4_list, verbose=0)
print("Scores: \n" , (gray_multi_scores))
print("First label. Accuracy: %.2f%%" % (gray_multi_scores[3]*100))
print("Second label. Accuracy: %.2f%%" % (gray_multi_scores[4]*100))
hide_code
# Create bottleneck features
resize_x_train = np.array([scipy.misc.imresize(x_train[i], (139, 139, 3))
for i in range(0, len(x_train))]).astype('float32')
resize_x_valid = np.array([scipy.misc.imresize(x_valid[i], (139, 139, 3))
for i in range(0, len(x_valid))]).astype('float32')
resize_x_test = np.array([scipy.misc.imresize(x_test[i], (139, 139, 3))
for i in range(0, len(x_test))]).astype('float32')
iv3_x_train = preprocess_input(resize_x_train)
iv3_x_valid = preprocess_input(resize_x_valid)
iv3_x_test = preprocess_input(resize_x_test)
iv3_base_model = InceptionV3(weights='imagenet', include_top=False)
x_train_bn = iv3_base_model.predict(iv3_x_train)
x_valid_bn = iv3_base_model.predict(iv3_x_valid)
x_test_bn = iv3_base_model.predict(iv3_x_test)
hide_code
# Save bottleneck features
x_train_bn = np.squeeze(x_train_bn)
x_valid_bn = np.squeeze(x_valid_bn)
x_test_bn = np.squeeze(x_test_bn)
np.save('x_train_bn.npy', x_train_bn)
np.save('x_valid_bn.npy', x_valid_bn)
np.save('x_test_bn.npy', x_test_bn)
hide_code
# Load bottleneck features
x_train_bn = np.load('x_train_bn.npy')
x_valid_bn = np.load('x_valid_bn.npy')
x_test_bn = np.load('x_test_bn.npy')
hide_code
def iv3_model():
model = Sequential()
# TODO: Define a model architecture
# TODO: Compile the model
return model
iv3_model = iv3_model()
hide_code
# Create callbacks
iv3_checkpointer = ModelCheckpoint(filepath='weights.best.iv3.hdf5',
verbose=2, save_best_only=True)
iv3_lr_reduction = ReduceLROnPlateau(monitor='val_loss',
patience=5, verbose=2, factor=0.8)
hide_code
# Fit the model
iv3_history = iv3_model.fit(x_train_bn, y_train,
validation_data=(x_valid_bn, y_valid),
epochs=50, batch_size=64,
callbacks=[iv3_checkpointer, iv3_lr_reduction], verbose=0);
hide_code
# Plot the training history
history_plot(iv3_history, 0)
hide_code
# Load the model with the best validation accuracy
iv3_model.load_weights('weights.best.iv3.hdf5')
# Calculate classification accuracy on the testing set
iv3_scores = iv3_model.evaluate(x_test_bn, y_test)
print("Accuracy: %.2f%%" % (iv3_scores[1]*100))
iv3_scores
hide_code
# Create bottleneck features
resize_x_train2 = np.array([scipy.misc.imresize(x_train2_color[i], (139, 139, 3))
for i in range(0, len(x_train2_color))]).astype('float32')
resize_x_valid2 = np.array([scipy.misc.imresize(x_valid2_color[i], (139, 139, 3))
for i in range(0, len(x_valid2_color))]).astype('float32')
resize_x_test2 = np.array([scipy.misc.imresize(x_test2_color[i], (139, 139, 3))
for i in range(0, len(x_test2_color))]).astype('float32')
iv3_x_train2 = preprocess_input(resize_x_train2)
iv3_x_valid2 = preprocess_input(resize_x_valid2)
iv3_x_test2 = preprocess_input(resize_x_test2)
iv3_base_model2 = InceptionV3(weights='imagenet', include_top=False)
x_train_bn2 = iv3_base_model2.predict(iv3_x_train2)
x_valid_bn2 = iv3_base_model2.predict(iv3_x_valid2)
x_test_bn2 = iv3_base_model2.predict(iv3_x_test2)
hide_code
# Save bottleneck features
x_train_bn2 = np.squeeze(x_train_bn2)
x_valid_bn2 = np.squeeze(x_valid_bn2)
x_test_bn2 = np.squeeze(x_test_bn2)
np.save('x_train_bn2.npy', x_train_bn2)
np.save('x_valid_bn2.npy', x_valid_bn2)
np.save('x_test_bn2.npy', x_test_bn2)
hide_code
# Load bottleneck features
x_train_bn2 = np.load('x_train_bn2.npy')
x_valid_bn2 = np.load('x_valid_bn2.npy')
x_test_bn2 = np.load('x_test_bn2.npy')
hide_code
def iv3_gray_model():
model = Sequential()
# TODO: Define a model architecture
# TODO: Compile the model
return model
iv3_gray_model = iv3_gray_model()
hide_code
# Create callbacks
iv3_gray_checkpointer = ModelCheckpoint(filepath='weights.best.iv3_gray.hdf5',
verbose=2, save_best_only=True)
iv3_gray_lr_reduction = ReduceLROnPlateau(monitor='val_loss',
patience=5, verbose=2, factor=0.8)
hide_code
# Fit the model
iv3_gray_history = iv3_gray_model.fit(x_train_bn2, y_train2,
validation_data=(x_valid_bn2, y_valid2),
epochs=50, batch_size=64,
callbacks=[iv3_gray_checkpointer, iv3_gray_lr_reduction], verbose=0);
hide_code
# Plot the training history
history_plot(iv3_gray_history, 0)
hide_code
# Load the model with the best validation accuracy
iv3_gray_model.load_weights('weights.best.iv3_gray.hdf5')
# Calculate classification accuracy on the testing set
iv3_gray_scores = iv3_gray_model.evaluate(x_test_bn2, y_test2)
print("Accuracy: %.2f%%" % (iv3_gray_scores[1]*100))
iv3_gray_scores
hide_code
# Create a list of symbols
symbols = ['а','б','в','г','д','е','ё','ж','з','и','й',
'к','л','м','н','о','п','р','с','т','у','ф',
'х','ц','ч','ш','щ','ъ','ы','ь','э','ю','я']
hide_code
# Model predictions for the testing dataset
y_test_predict = model.predict_classes(x_test)
hide_code
# Display true labels and predictions
fig = plt.figure(figsize=(18, 18))
for i, idx in enumerate(np.random.choice(x_test.shape[0], size=16, replace=False)):
ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
ax.imshow(np.squeeze(x_test[idx]))
pred_idx = y_test_predict[idx]
true_idx = np.argmax(y_test[idx])
ax.set_title("{} ({})".format(symbols[pred_idx], symbols[true_idx]),
color=("#4876ff" if pred_idx == true_idx else "darkred"))
hide_code
# Model predictions for the testing dataset
y_test2_predict = gray_model.predict_classes(x_test2)
hide_code
# Display true labels and predictions
fig = plt.figure(figsize=(18, 18))
for i, idx in enumerate(np.random.choice(x_test2.shape[0], size=16, replace=False)):
ax = fig.add_subplot(4, 4, i + 1, xticks=[], yticks=[])
ax.imshow(np.squeeze(x_test2[idx]), cmap=cm.bone)
pred_idx = y_test2_predict[idx]
true_idx = np.argmax(y_test2[idx])
ax.set_title("{} ({})".format(symbols[pred_idx], symbols[true_idx]),
color=("#4876ff" if pred_idx == true_idx else "darkred"))