In this notebook, we use transfer learning to train a CNN to classify dog breeds.
Before running the code cell below, download the dataset of dog images here and place it in the respository.
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Orbitron|Roboto');
body {background-color: #add8e6;}
a {color: darkblue; font-family: 'Roboto';}
h1 {color: steelblue; font-family: 'Orbitron'; text-shadow: 4px 4px 4px #aaa;}
h2, h3 {color: #483d8b; font-family: 'Orbitron'; text-shadow: 4px 4px 4px #aaa;}
h4 {color: slategray; font-family: 'Roboto';}
span {text-shadow: 4px 4px 4px #ccc;}
div.output_prompt, div.output_area pre {color: #483d8b;}
div.input_prompt, div.output_subarea {color: darkblue;}
div.output_stderr pre {background-color: #add8e6;}
div.output_stderr {background-color: #483d8b;}
</style>
from sklearn.datasets import load_files
from keras.utils import np_utils
import numpy as np
from glob import glob
# define function to load train, test, and validation datasets
def load_dataset(path):
data = load_files(path)
dog_files = np.array(data['filenames'])
dog_targets = np_utils.to_categorical(np.array(data['target']), 133)
return dog_files, dog_targets
# load train, test, and validation datasets
path = '/Users/olgabelitskaya/projects/nd009/Machine_Learning_Engineer_ND_P10/'
train_files, train_targets = load_dataset(path+'dogImages/train')
valid_files, valid_targets = load_dataset(path+'dogImages/valid')
test_files, test_targets = load_dataset(path+'dogImages/test')
# load ordered list of dog names
dog_names = [item[25:-1] for item in glob(path+'dogImages/train/*/')]
# print statistics about the dataset
print('There are %d total dog categories.' % len(dog_names))
print('There are %s total dog images.\n' % str(len(train_files) + len(valid_files) + len(test_files)))
print('There are %d training dog images.' % len(train_files))
print('There are %d validation dog images.' % len(valid_files))
print('There are %d test dog images.'% len(test_files))
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
def visualize_img(img_path, ax):
img = cv2.imread(img_path)
ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
fig = plt.figure(figsize=(20, 10))
for i in range(12):
ax = fig.add_subplot(3, 4, i + 1, xticks=[], yticks=[])
visualize_img(train_files[i], ax)
bottleneck_features = np.load(path+'bottleneck_features/DogVGG16Data.npz')
train_vgg16 = bottleneck_features['train']
valid_vgg16 = bottleneck_features['valid']
test_vgg16 = bottleneck_features['test']
from keras.layers import Dense, Flatten, Dropout
from keras.models import Sequential
model = Sequential()
model.add(Flatten(input_shape=(7, 7, 512)))
model.add(Dense(133, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='nadam',
metrics=['accuracy'])
model.summary()
from keras.layers import GlobalAveragePooling2D
model = Sequential()
model.add(GlobalAveragePooling2D(input_shape=(7, 7, 512)))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(133, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='nadam',
metrics=['accuracy'])
from keras.callbacks import ModelCheckpoint
# train the model
checkpointer = ModelCheckpoint(filepath='dogvgg16.weights.best.hdf5', verbose=1,
save_best_only=True)
model.fit(train_vgg16, train_targets, epochs=20, validation_data=(valid_vgg16, valid_targets),
callbacks=[checkpointer], verbose=2, shuffle=True);
# load the weights that yielded the best validation accuracy
model.load_weights('dogvgg16.weights.best.hdf5')
# get index of predicted dog breed for each image in test set
vgg16_predictions = [np.argmax(model.predict(np.expand_dims(feature, axis=0)))
for feature in test_vgg16]
# report test accuracy
test_accuracy = 100*np.sum(np.array(vgg16_predictions)==
np.argmax(test_targets, axis=1))/len(vgg16_predictions)
print('\nTest accuracy: %.4f%%' % test_accuracy)