Machine Learning Engineer Nanodegree

Deep Learning

📑   P5: Build a Digit Recognition Program. Step 3

In [378]:
import numpy as np
import pandas as pd

import cv2

import scipy as sp
import scipy.ndimage
import scipy.misc
from scipy.special import expit

import random
from time import time

import os
import sys

import h5py
import tarfile

from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle

import tensorflow as tf
import tensorflow.examples.tutorials.mnist as mnist

from skimage.feature import hog
from sklearn.externals import joblib
from sklearn.neural_network import MLPClassifier, BernoulliRBM
from sklearn import linear_model, datasets, metrics
from sklearn.pipeline import Pipeline
from sklearn import manifold, decomposition, ensemble
from sklearn import discriminant_analysis, random_projection
from sklearn.model_selection import train_test_split

import keras as ks
from keras.models import Sequential, load_model, Model
from keras.preprocessing import sequence
from keras.optimizers import SGD, RMSprop
from keras.layers import Dense, Dropout, LSTM
from keras.layers import Activation, Flatten, Input, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers.embeddings import Embedding
from keras.callbacks import ModelCheckpoint

from IPython.display import display, Image, IFrame
import matplotlib.pylab as plt
import as cm
from matplotlib import offsetbox
%matplotlib inline

import warnings
In [3]:
def fivedigit_label(label):
    size = len(label)
    if size >= 5:
        return label
        num_zeros = np.full((5-size), 10)
        return np.array(np.concatenate((num_zeros, label), axis = 0))
def get_filenames(folder):
    image_path = os.path.join(folder)
    return np.array([f for f in os.listdir(image_path) if f.endswith('.png')])

def get_image(folder, image_file):
    filename=os.path.join(folder, image_file)
    image = scipy.ndimage.imread(filename, mode='RGB')
    if folder == 'new':
        n = np.where(new_filenames == image_file)[0]
        label = new_labels[n]
    image64_1 = scipy.misc.imresize(image, (32, 32, 3))/255
    image64_2 =, dtype='float32'), [0.299, 0.587, 0.114])
    return image64_1, image64_2, label

def digit_to_categorical(data):
    n = data.shape[1]
    data_cat = np.empty([len(data), n, 11])
    for i in range(n):
        data_cat[:, i] = ks.utils.to_categorical(data[:, i], num_classes=11)
    return data_cat
In [307]:
def get_image2(folder, image_file, boxes):
    filename=os.path.join(folder, image_file)
    image = scipy.ndimage.imread(filename, mode='RGB')
    box = boxes.loc[image_file]
    image = image[box[0]:box[1], box[2]:box[3]]
    if folder == 'new':
        n = np.where(new_filenames == image_file)[0]
        label = new_labels[n]
    image32_1 = scipy.misc.imresize(image, (32, 32, 3))/255
    image32_2 =, dtype='float32'), [0.299, 0.587, 0.114])
    return image32_1, image32_2, label

def get_image3(folder, image_file, boxes):
    filename=os.path.join(folder, image_file)
    image = cv2.imread(filename)
    box = boxes.loc[image_file]
    image = image[box[0]:box[1], box[2]:box[3]]
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#    image_hist = cv2.equalizeHist(image_gray)
    image_r = cv2.resize(image_gray,(32, 32), interpolation = cv2.INTER_CUBIC)
    if folder == 'new':
        n = np.where(new_filenames == image_file)[0]
        label = new_labels[n]
    return image_r, label

def get_image4(folder, image_file):
    filename = os.path.join(folder, image_file)
    img = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB)
    img = cv2.GaussianBlur(img, (7, 7), -1)[3:-3, 3:-3]    
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)    
    edges = cv2.Canny(img_gray, 100, 200)
#    img[edges!=0] = (255, 255, 255)
#    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    rectangle = cv2.boundingRect(edges)    
    n1, n2, n3, n4 = rectangle[1], rectangle[1]+rectangle[3], rectangle[0], rectangle[0]+rectangle[2]
    if (n2 == 0): n2 = img.shape[0]
    if (n4 == 0): n4 = img.shape[1]
    image_box = img_gray[n1:n2, n3:n4]
    image_resize = cv2.resize(image_box, (32, 32), interpolation = cv2.INTER_CUBIC)
    return image_resize

Experimental Datasets

Dataset #6. Newly-Captured Images


In [258]:
new_filenames = get_filenames('new')
print('New files list:\n', new_filenames)

new_labels = [[8], [5, 6, 3], [5, 7], [6], [6, 1, 1],
              [8], [5, 9], [1, 0, 1], [1, 0, 0, 0], [1, 9, 1, 3], 
              [4], [1], [3, 4, 4], [2, 4, 4, 8], [1, 5], 
              [7], [7, 8, 2, 2], [6, 4, 8], [2], [3, 0],              
              [3], [4, 3], [2, 0, 1, 0], [7, 8, 3], [1, 0, 1, 1], 
              [7], [1, 0], [2], [9], [8]]
new_labels = np.array([fivedigit_label(new_labels[i]) for i in range(len(new_labels))])
print('New labels: \n',new_labels)
New files list:
 ['1.png' '10.png' '11.png' '12.png' '13.png' '14.png' '15.png' '16.png'
 '17.png' '18.png' '19.png' '2.png' '20.png' '21.png' '22.png' '23.png'
 '24.png' '25.png' '26.png' '27.png' '28.png' '29.png' '3.png' '30.png'
 '4.png' '5.png' '6.png' '7.png' '8.png' '9.png']
New labels: 
 [[10 10 10 10  8]
 [10 10  5  6  3]
 [10 10 10  5  7]
 [10 10 10 10  6]
 [10 10  6  1  1]
 [10 10 10 10  8]
 [10 10 10  5  9]
 [10 10  1  0  1]
 [10  1  0  0  0]
 [10  1  9  1  3]
 [10 10 10 10  4]
 [10 10 10 10  1]
 [10 10  3  4  4]
 [10  2  4  4  8]
 [10 10 10  1  5]
 [10 10 10 10  7]
 [10  7  8  2  2]
 [10 10  6  4  8]
 [10 10 10 10  2]
 [10 10 10  3  0]
 [10 10 10 10  3]
 [10 10 10  4  3]
 [10  2  0  1  0]
 [10 10  7  8  3]
 [10  1  0  1  1]
 [10 10 10 10  7]
 [10 10 10  1  0]
 [10 10 10 10  2]
 [10 10 10 10  9]
 [10 10 10 10  8]]
In [259]:
image_example = get_image('new', '30.png')

print("Image size: ", image_example[0].shape)
print("Image label: ", image_example[2])
print('\nExample of image preprocessing')
plt.imshow(image_example[1], cmap=cm.Blues);
Image size:  (32, 32, 3)
Image label:  [[10 10  7  8  3]]

Example of image preprocessing
In [260]:
new_images1 = np.array([get_image('new', x)[0] for x in new_filenames])
new_images2 = np.array([get_image('new', x)[1] for x in new_filenames])

The '.pickle' file

In [261]:
pickle_file = 'new_digits.pickle'

    f = open(pickle_file, 'wb')
    save = {'new_images1': new_images1, 'new_images2': new_images2,
            'new_labels': new_labels, 'new_filenames': new_filenames}
    pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
    print('Unable to save data to', pickle_file, ':', e)
statinfo = os.stat(pickle_file)
print('Compressed pickle size:', statinfo.st_size)
Compressed pickle size: 985369
In [262]:
pickle_file = 'new_digits.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    new_images1 = save['new_images1']
    new_images2 = save['new_images2']
    new_labels = save['new_labels']
    new_filenames = save['new_filenames']
    del save
print('Number of new images: ', len(new_images1))
Number of new images:  30
In [263]:
new_labels_cat = digit_to_categorical(new_labels)
print('The sixth dataset')
print("Shape of the features - {}, shape of the target - {}".\
      format(new_images2.shape, new_labels_cat.shape))
The sixth dataset
Shape of the features - (30, 32, 32), shape of the target - (30, 5, 11)
In [264]:
X_new = new_images2.reshape(30, 32, 32, 1)
y_new_cat_list = [new_labels_cat[:, i] for i in range(5)]
In [265]:
print ('File: ', new_filenames[23])
print ('Label: ', new_labels[23])
print ('Categorical label: \n', new_labels_cat[23])
print('\nExample of loaded images')
File:  30.png
Label:  [10 10  7  8  3]
Categorical label: 
 [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]

Example of loaded images

The image boxes

In [267]:
boxes = np.array([[10, 120, 15, 100], [20, 55, 0, 120], [5, 90, 15, 140], [100, 500, 200, 500], [20, 100, 30, 160],
                 [5, 220, 70, 250], [10, 180, 20, 130], [5, 100, 15, 110], [10, 120, 10, 220], [150, 400, 10, 480],
                 [30, 150, 0, 100], [70, 220, 80, 210], [130, 250, 80, 300], [10, 60, 5, 155], [40, 80, 60, 110],
                 [0, 120, 0, 100], [40, 90, 10, 170], [40, 80, 30, 130], [30, 180, 20, 140], [70, 120, 40, 90],
                 [10, 140, 20, 90], [20, 120, 0, 130], [20, 140, 10, 290], [60, 120, 10, 110], [10, 140, 10, 340],
                 [20, 150, 20, 130], [10, 170, 20, 190], [10, 230, 20, 170], [10, 240, 200, 400], [10, 140, 20, 150]])

new_boxes = pd.DataFrame(data=boxes, index = new_filenames)
0 1 2 3
1.png 10 120 15 100
10.png 20 55 0 120
11.png 5 90 15 140
12.png 100 500 200 500
13.png 20 100 30 160
14.png 5 220 70 250
15.png 10 180 20 130
In [268]:
new_images1_2 = np.array([get_image2('new', x, new_boxes)[0] for x in new_filenames])
new_images2_2 = np.array([get_image2('new', x, new_boxes)[1] for x in new_filenames])

X_new2 = new_images2_2.reshape(30, 32, 32, 1)
In [271]:
print ('File: ', new_filenames[1])
print ('Label: ', new_labels[1])
print ('Categorical label: \n', new_labels_cat[1])
print('\nExample of loaded images')
File:  10.png
Label:  [10 10  5  6  3]
Categorical label: 
 [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]

Example of loaded images

With image boxes. OpenCV

In [348]:
new_images2_3 = np.array([get_image3('new', x, new_boxes)[0] for x in new_filenames])
X_new3 = new_images2_3.reshape(30, 32, 32, 1)/255

new_images2_4 = np.array([get_image4('new', x) for x in new_filenames])
X_new4 = new_images2_4.reshape(30, 32, 32, 1)/255

Step 3: Test a Model on Newly-Captured Images

Take several pictures of numbers that you find around you (at least five), and run them through your classifier on your computer to produce example results. Alternatively (optionally), you can try using OpenCV / SimpleCV / Pygame to capture live images from a webcam and run those through your classifier.

Load models

In [278]:
cnn_model = load_model('cnn_model.h5')
cnn_model2 = load_model('cnn_model2.h5')

Predictions without boxes

In [279]:
y_new_predict = cnn_model.predict(X_new)

y_predict = []
for i in range(30):
    for j in range(5):
y_predict = np.array(y_predict).reshape(30,5)
print('CNN Model 1. Predictions: ')
CNN Model 1. Predictions: 
[[10 10 10 10  8]
 [10 10 10 10  2]
 [10 10 10  5  7]
 [10 10 10 10  6]
 [10 10  1  4  1]
 [10 10 10 10  6]
 [10 10 10  5  9]
 [10 10  1  0  1]
 [10  1  0  0  0]
 [10  1  9  1  3]
 [10 10 10  4  1]
 [10 10 10 10  1]
 [10 10  2  2  1]
 [10 10 10  4  4]
 [10 10 10 10  4]
 [10 10 10  7  2]
 [10 10 10  1  5]
 [10 10  1  0  4]
 [10 10 10 10  2]
 [10 10 10 10  0]
 [10 10 10 10  3]
 [10 10  4  4  3]
 [10  2  0  0  0]
 [10 10 10  2  5]
 [10  1  0  3  1]
 [10 10 10 10  7]
 [10 10 10  1  0]
 [10 10 10 10  2]
 [10 10 10  9  9]
 [10 10 10 10  8]]
In [280]:
y_new_predict_2 = cnn_model2.predict(X_new)

y_predict_2 = []
for i in range(30):
    for j in range(5):
y_predict_2 = np.array(y_predict_2).reshape(30,5)
print('CNN Model 2. Predictions: ')
CNN Model 2. Predictions: 
[[10 10 10 10  8]
 [10 10 10  2  2]
 [10 10 10  5  7]
 [10 10 10  1  6]
 [10  1  1  5  1]
 [10 10 10  1  3]
 [10 10 10  5  9]
 [10 10  1  0  1]
 [10  1  0  0  0]
 [10  1  9  1  2]
 [10 10 10  4  1]
 [10 10 10 10  0]
 [10 10 10  1  4]
 [10  2 10  4  4]
 [10 10 10 10  1]
 [10 10 10  7  7]
 [10 10 10  4  9]
 [10 10  1  4  4]
 [10 10 10 10  1]
 [10 10  1  5  4]
 [10 10 10 10  3]
 [10 10 10  4  3]
 [10  2  0  1  0]
 [10 10 10 10  5]
 [10  1  0  4  1]
 [10 10 10 10  7]
 [10 10 10  1  0]
 [10 10 10 10  2]
 [10 10 10  1  9]
 [10 10 10 10  3]]
In [281]:
cnn_scores = cnn_model.evaluate(X_new, y_new_cat_list, verbose=0)

print("CNN Model 1. Scores: \n" , (cnn_scores))
print("CNN Model 1. First digit. Accuracy: %.2f%%" % (cnn_scores[6]*100))
print("CNN Model 1. Second digit. Accuracy: %.2f%%" % (cnn_scores[7]*100))
print("CNN Model 1. Third digit. Accuracy: %.2f%%" % (cnn_scores[8]*100))
print("CNN Model 1. Fourth digit. Accuracy: %.2f%%" % (cnn_scores[9]*100))
print("CNN Model 1. Fifth digit. Accuracy: %.2f%%" % (cnn_scores[10]*100))
CNN Model 1. Scores: 
 [4.0245327949523926, 0.0035858158953487873, 0.4717223048210144, 1.1623708009719849, 1.413460373878479, 0.97339349985122681, 1.0, 0.93333333730697632, 0.73333334922790527, 0.56666666269302368, 0.66666668653488159]
CNN Model 1. First digit. Accuracy: 100.00%
CNN Model 1. Second digit. Accuracy: 93.33%
CNN Model 1. Third digit. Accuracy: 73.33%
CNN Model 1. Fourth digit. Accuracy: 56.67%
CNN Model 1. Fifth digit. Accuracy: 66.67%
In [282]:
avg_accuracy = sum([cnn_scores[i] for i in range(6, 11)])/5

print("CNN Model 1. Average Accuracy: %.2f%%" % (avg_accuracy*100))
CNN Model 1. Average Accuracy: 78.00%
In [283]:
cnn_scores_2 = cnn_model2.evaluate(X_new, y_new_cat_list, verbose=0)

print("CNN Model 2. Scores: \n" , (cnn_scores_2))
print("CNN Model 2. First digit. Accuracy: %.2f%%" % (cnn_scores_2[6]*100))
print("CNN Model 2. Second digit. Accuracy: %.2f%%" % (cnn_scores_2[7]*100))
print("CNN Model 2. Third digit. Accuracy: %.2f%%" % (cnn_scores_2[8]*100))
print("CNN Model 2. Fourth digit. Accuracy: %.2f%%" % (cnn_scores_2[9]*100))
print("CNN Model 2. Fifth digit. Accuracy: %.2f%%" % (cnn_scores_2[10]*100))
CNN Model 2. Scores: 
 [4.6979498863220215, 0.0025169970467686653, 0.36192399263381958, 1.4037339687347412, 1.7670741081237793, 1.162700891494751, 1.0, 0.93333333730697632, 0.73333334922790527, 0.56666666269302368, 0.56666666269302368]
CNN Model 2. First digit. Accuracy: 100.00%
CNN Model 2. Second digit. Accuracy: 93.33%
CNN Model 2. Third digit. Accuracy: 73.33%
CNN Model 2. Fourth digit. Accuracy: 56.67%
CNN Model 2. Fifth digit. Accuracy: 56.67%
In [284]:
avg_accuracy_2 = sum([cnn_scores_2[i] for i in range(6, 11)])/5

print("CNN Model 2. Average Accuracy: %.2f%%" % (avg_accuracy_2*100))
CNN Model 2. Average Accuracy: 76.00%

Questions and Answers

Question 7

Choose five candidate images of numbers you took from around you and provide them in the report. Are there any particular qualities of the image(s) that might make classification difficult?

Answer 7

"10.png" "11.png" "27.png" "13.png" "19.png"

Here are five candidate images. It's not so easy to recognize digits for some reasons: additional symbols ("-", border lines, strokes, circles, etc.), too stylish fonts, lines and cells as a background.

Question 8

Is your model able to perform equally well on captured pictures or a live camera stream when compared to testing on the realistic dataset?

Answer 8

Accuracy indicators of predictions are worse than for the testing data set of street photos. They have decreased till 76-78%.

Optional: Question 9

If necessary, provide documentation for how an interface was built for your model to load and classify newly-acquired images.

Answer 9

I took the simplest procedures that can be used to process the images and to locate the digits in the photos. They are built with programming libraries 'SciPy' and 'OpenCV'. They do not require special skills in application and documentation.

And the model was also built with a very well-known library 'Keras'. By ease of use and by endless possibilities in creating models, this resource is similar to the set "Lego" for constructing. I do not think that in this case, any special documentation is needed.

Step 4: Explore an Improvement for a Model

There are many things you can do once you have the basic classifier in place. One example would be to also localize where the numbers are on the image. The SVHN dataset provides bounding boxes that you can tune to train a localizer. Train a regression loss to the coordinates of the bounding box, and then test it.

Predictions with boxes

In [285]:
y_new_predict2 = cnn_model.predict(X_new2)

y_predict2 = []
for i in range(30):
    for j in range(5):
y_predict2 = np.array(y_predict2).reshape(30,5)
print('CNN Model 1. Predictions: ')
CNN Model 1. Predictions: 
[[10 10 10 10  8]
 [10 10  3  5  3]
 [10 10 10  5  7]
 [10 10 10 10  6]
 [10 10  6  4  0]
 [10 10 10 10  8]
 [10 10 10  5  9]
 [10 10  1  0  1]
 [10  1  0  0  0]
 [10  1  9  1  3]
 [10 10 10  4  1]
 [10 10 10 10  1]
 [10 10  3  4  4]
 [10 10 10  4  4]
 [10 10 10  1  5]
 [10 10 10  7  2]
 [10  2  3  0  2]
 [10  1  6  6  8]
 [10 10 10 10  2]
 [10 10 10  3  0]
 [10 10 10  1  3]
 [10 10  4  4  3]
 [10  2  0  0  0]
 [10 10  7  8  3]
 [10  1  0  5  0]
 [10 10 10 10  7]
 [10 10 10  1  0]
 [10 10 10 10  2]
 [10 10 10 10  9]
 [10 10 10 10  8]]
In [286]:
cnn_scores2 = cnn_model.evaluate(X_new2, y_new_cat_list, verbose=0)

print("CNN Model 1. Scores: \n" , (cnn_scores2))
print("CNN Model 1. First digit. Accuracy: %.2f%%" % (cnn_scores2[6]*100))
print("CNN Model 1. Second digit. Accuracy: %.2f%%" % (cnn_scores2[7]*100))
print("CNN Model 1. Third digit. Accuracy: %.2f%%" % (cnn_scores2[8]*100))
print("CNN Model 1. Fourth digit. Accuracy: %.2f%%" % (cnn_scores2[9]*100))
print("CNN Model 1. Fifth digit. Accuracy: %.2f%%" % (cnn_scores2[10]*100))
CNN Model 1. Scores: 
 [1.7664835453033447, 0.00093853432917967439, 0.27404701709747314, 0.29167380928993225, 0.75607544183731079, 0.44374877214431763, 1.0, 0.89999997615814209, 0.86666667461395264, 0.69999998807907104, 0.83333331346511841]
CNN Model 1. First digit. Accuracy: 100.00%
CNN Model 1. Second digit. Accuracy: 90.00%
CNN Model 1. Third digit. Accuracy: 86.67%
CNN Model 1. Fourth digit. Accuracy: 70.00%
CNN Model 1. Fifth digit. Accuracy: 83.33%
In [287]:
avg_accuracy2 = sum([cnn_scores2[i] for i in range(6, 11)])/5

print("CNN Model 1. Average Accuracy: %.2f%%" % (avg_accuracy2*100))
CNN Model 1. Average Accuracy: 86.00%
In [288]:
y_new_predict2_2 = cnn_model2.predict(X_new2)

y_predict2_2 = []
for i in range(30):
    for j in range(5):
y_predict2_2 = np.array(y_predict2_2).reshape(30,5)
print('CNN Model 2. Predictions: ')
CNN Model 2. Predictions: 
[[10 10 10 10  8]
 [10 10  3  8  5]
 [10 10 10  5  7]
 [10 10 10 10  6]
 [10 10 10  5  1]
 [10 10 10 10  8]
 [10 10 10  5  9]
 [10 10  1  0  1]
 [10  1  0  0  0]
 [10  1  9  1  3]
 [10 10 10  2  1]
 [10 10 10 10  1]
 [10 10  3  4  4]
 [10  2 10  4  4]
 [10 10 10  4  5]
 [10 10 10 10  7]
 [10  2  7  1  7]
 [10  1  4  4  8]
 [10 10 10 10  2]
 [10 10 10  3  0]
 [10 10 10 10  3]
 [10 10 10  4  3]
 [10  2  0  1  0]
 [10 10  7  8  3]
 [10  1  0  9  1]
 [10 10 10 10  7]
 [10 10 10  1  0]
 [10 10 10 10  2]
 [10 10 10 10  9]
 [10 10 10 10  8]]
In [289]:
cnn_scores2_2 = cnn_model2.evaluate(X_new2, y_new_cat_list, verbose=0)

print("CNN Model 2. Scores: \n" , (cnn_scores2_2))
print("CNN Model 2. First digit. Accuracy: %.2f%%" % (cnn_scores2_2[6]*100))
print("CNN Model 2. Second digit. Accuracy: %.2f%%" % (cnn_scores2_2[7]*100))
print("CNN Model 2. Third digit. Accuracy: %.2f%%" % (cnn_scores2_2[8]*100))
print("CNN Model 2. Fourth digit. Accuracy: %.2f%%" % (cnn_scores2_2[9]*100))
print("CNN Model 2. Fifth digit. Accuracy: %.2f%%" % (cnn_scores2_2[10]*100))
CNN Model 2. Scores: 
 [1.7370322942733765, 0.00017315873992629349, 0.21628722548484802, 0.42631766200065613, 0.59386265277862549, 0.50039160251617432, 1.0, 0.93333333730697632, 0.83333331346511841, 0.80000001192092896, 0.86666667461395264]
CNN Model 2. First digit. Accuracy: 100.00%
CNN Model 2. Second digit. Accuracy: 93.33%
CNN Model 2. Third digit. Accuracy: 83.33%
CNN Model 2. Fourth digit. Accuracy: 80.00%
CNN Model 2. Fifth digit. Accuracy: 86.67%
In [290]:
avg_accuracy2_2 = sum([cnn_scores2_2[i] for i in range(6, 11)])/5

print("CNN Model 2. Average Accuracy: %.2f%%" % (avg_accuracy2_2*100))
CNN Model 2. Average Accuracy: 88.67%

Individual predictions for images. OpenCV

In [300]:
image_gray = get_image3('new', '10.png', new_boxes)[0]
print ('File: ', new_filenames[1])
print ('Label: ', new_labels[1])
print ('Categorical label: \n', new_labels_cat[1])
File:  10.png
Label:  [10 10  5  6  3]
Categorical label: 
 [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]
In [311]:
image_for_predict = image_gray.reshape(1, 32, 32, 1)/255
y_image_predict = cnn_model.predict(image_for_predict)
y_predict_example = [np.argmax(y) for y in y_image_predict]

print('Predicted label:')
Predicted label:
[10, 10, 3, 6, 3]
In [336]:
image_gray_2 = get_image4('new', '30.png')
print ('File: ', new_filenames[23])
print ('Label: ', new_labels[23])
print ('Categorical label: \n', new_labels_cat[23])
File:  30.png
Label:  [10 10  7  8  3]
Categorical label: 
 [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]
In [337]:
image_for_predict_2 = image_gray_2.reshape(1, 32, 32, 1)/255
y_image_predict_2 = cnn_model2.predict(image_for_predict_2)
y_predict_example_2 = [np.argmax(y) for y in y_image_predict_2]

print('Predicted label:')
Predicted label:
[10, 10, 7, 8, 3]

Predictions with boxes. OpenCV

In [349]:
image_gray2 = new_images2_4[1]
print ('File: ', new_filenames[1])
print ('Label: ', new_labels[1])
print ('Categorical label: \n', new_labels_cat[1])
File:  10.png
Label:  [10 10  5  6  3]
Categorical label: 
 [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]
In [350]:

image_gray2_2 = new_images2_4[23]
print ('File: ', new_filenames[23])
print ('Label: ', new_labels[23])
print ('Categorical label: \n', new_labels_cat[23])
File:  30.png
Label:  [10 10  7  8  3]
Categorical label: 
 [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]]
In [351]:
y_new_predict4 = cnn_model.predict(X_new4)
y_predict4 = []
for i in range(30):
    for j in range(5):
y_predict4 = np.array(y_predict4).reshape(30,5)
print('CNN Model 1. Predictions: ')
CNN Model 1. Predictions: 
[[10 10 10 10  9]
 [10  2  3  3  5]
 [10 10 10  5  7]
 [10 10 10 10  6]
 [10 10  4  4  0]
 [10 10 10 10  2]
 [10 10  5  4  9]
 [10 10  1  0  1]
 [10  1  0  0  0]
 [10 10 10 10  1]
 [10 10 10  4  1]
 [10 10 10 10  1]
 [10 10  3  4  4]
 [10 10 10  2  4]
 [10 10 10  1  5]
 [10 10 10  7  7]
 [10 10 10  4  1]
 [10  2  4  4  8]
 [10 10 10 10  4]
 [10 10 10  3  0]
 [10 10 10 10  3]
 [10 10  4  0  3]
 [10  2  0  1  0]
 [10 10  7  8  3]
 [10  1  0  3  1]
 [10 10 10 10  7]
 [10 10 10  1  0]
 [10 10 10 10  2]
 [10 10 10  5  9]
 [10 10 10 10  8]]
In [352]:
cnn_scores4 = cnn_model.evaluate(X_new4, y_new_cat_list, verbose=0)

print("CNN Model 1. \n")
print("Scores: \n" , (cnn_scores4))
print("First digit. Accuracy: %.2f%%" % (cnn_scores4[6]*100))
print("Second digit. Accuracy: %.2f%%" % (cnn_scores4[7]*100))
print("Third digit. Accuracy: %.2f%%" % (cnn_scores4[8]*100))
print("Fourth digit. Accuracy: %.2f%%" % (cnn_scores4[9]*100))
print("Fifth digit. Accuracy: %.2f%%" % (cnn_scores4[10]*100))
CNN Model 1. 

 [7.1819477081298828, 0.00027177872834727168, 1.3892989158630371, 1.9871330261230469, 2.1887195110321045, 1.6165248155593872, 1.0, 0.83333331346511841, 0.73333334922790527, 0.63333332538604736, 0.69999998807907104]
First digit. Accuracy: 100.00%
Second digit. Accuracy: 83.33%
Third digit. Accuracy: 73.33%
Fourth digit. Accuracy: 63.33%
Fifth digit. Accuracy: 70.00%
In [353]:
avg_accuracy4 = sum([cnn_scores4[i] for i in range(6, 11)])/5

print("CNN Model 1. Average Accuracy: %.2f%%" % (avg_accuracy4*100))
CNN Model 1. Average Accuracy: 78.00%
In [354]:
y_new_predict4_2 = cnn_model2.predict(X_new4)
y_predict4_2 = []
for i in range(30):
    for j in range(5):
y_predict4_2 = np.array(y_predict4_2).reshape(30,5)
print('CNN Model 2. Predictions: ')
CNN Model 2. Predictions: 
[[10 10 10 10  0]
 [10 10  3  3  5]
 [10 10 10  5  5]
 [10 10 10  1  6]
 [10 10 10  4  1]
 [10 10 10 10  8]
 [10 10 10  6  9]
 [10 10  1  0  1]
 [10  1  0  0  0]
 [10 10 10 10  1]
 [10 10 10  2  1]
 [10 10 10 10  0]
 [10 10  3  4  4]
 [10 10 10  2  4]
 [10 10 10  1  5]
 [10 10 10  7  7]
 [10  1  1  7  4]
 [10  1  4  5  8]
 [10 10 10  1  4]
 [10 10 10  3  0]
 [10 10 10 10  3]
 [10 10 10  4  3]
 [10  2  0  1  0]
 [10 10  7  8  3]
 [10  1  1  5  1]
 [10 10 10 10  7]
 [10 10 10  1  0]
 [10 10 10 10  2]
 [10 10 10  7  9]
 [10 10 10  4  8]]
In [355]:
cnn_scores4_2 = cnn_model2.evaluate(X_new4, y_new_cat_list, verbose=0)

print("CNN Model 2. \n")
print("CNN Scores: \n" , (cnn_scores4_2))
print("First digit. Accuracy: %.2f%%" % (cnn_scores4_2[6]*100))
print("Second digit. Accuracy: %.2f%%" % (cnn_scores4_2[7]*100))
print("Third digit. Accuracy: %.2f%%" % (cnn_scores4_2[8]*100))
print("Fourth digit. Accuracy: %.2f%%" % (cnn_scores4_2[9]*100))
print("Fifth digit. Accuracy: %.2f%%" % (cnn_scores4_2[10]*100))
CNN Model 2. 

CNN Scores: 
 [6.5221505165100098, 0.0010312354424968362, 1.1571204662322998, 1.6763014793395996, 2.0833380222320557, 1.6043590307235718, 1.0, 0.86666667461395264, 0.76666665077209473, 0.53333336114883423, 0.69999998807907104]
First digit. Accuracy: 100.00%
Second digit. Accuracy: 86.67%
Third digit. Accuracy: 76.67%
Fourth digit. Accuracy: 53.33%
Fifth digit. Accuracy: 70.00%
In [356]:
avg_accuracy4_2 = sum([cnn_scores4_2[i] for i in range(6, 11)])/5

print("CNN Model 2. Average Accuracy: %.2f%%" % (avg_accuracy4_2*100))
CNN Model 2. Average Accuracy: 77.33%

Question 10

How well does your model localize numbers on the testing set from the realistic dataset? Do your classification results change at all with localization included?

Answer 10

On the testing realistic set of localized digits, the model works well enough and on average shows the accuracy of predictions about 95 percents.

I didn't train and test the model on the sets without localization. On my opinion, images include a lot of useless information for digit recognition without localization. This creates unnecessary interference and difficulties for the neural network.

Question 11

Test the localization function on the images you captured in Step 3. Does the model accurately calculate a bounding box for the numbers in the images you found? If you did not use a graphical interface, you may need to investigate the bounding boxes by hand. Provide an example of the localization created on a captured image.

Answer 11

The model predicts very well for the newly-captured images with bounding boxes constructed by hand but shows about the same accuracy for the programming procedure for finding digits in the photos and for images without bounding boxes at all. It means that I need to improve only the part for cutting image spaces without digits.

I have illustrated this fact by two examples of files.

Step 5: Build an Application or Program for a Model

Take your project one step further. If you're interested, look to build an Android application or even a more robust Python program that can interface with input images and display the classified numbers and even the bounding boxes. You can for example try to build an augmented reality app by overlaying your answer on the image like the Word Lens app does.

Loading a TensorFlow model into a camera app on Android is demonstrated in the TensorFlow Android demo app, which you can simply modify.

If you decide to explore this optional route, be sure to document your interface and implementation, along with significant results you find. You can see the additional rubric items that you could be evaluated on by following this link.

In [383]:
def digit_to_categorical(data):
    n = data.shape[1]
    data_cat = np.empty([len(data), n, 11])
    for i in range(n):
        data_cat[:, i] = ks.utils.to_categorical(data[:, i], num_classes=11)
    return data_cat

train_images = pd.read_csv("train_images1.csv")
train_images = np.array(train_images.drop('filename', axis=1))
train_images = train_images.reshape(-1, 32, 32, 3)

train_labels = pd.read_csv("train_labels.csv")
train_labels = np.array(train_labels[["0", "1", "2", "3", "4"]])
train_labels_cat = digit_to_categorical(train_labels)
train_labels_cat_list = [train_labels_cat[:, i] for i in range(5)]

test_images = pd.read_csv("test_images1.csv")
test_images = np.array(test_images.drop('filename', axis=1)) 
test_images = test_images.reshape(-1, 32, 32, 3)

test_labels = pd.read_csv("test_labels.csv")
test_labels = np.array(test_labels[["0", "1", "2", "3", "4"]])
test_labels_cat = digit_to_categorical(test_labels)
test_labels_cat_list = [test_labels_cat[:, i] for i in range(5)]
In [384]:
train_images.shape, test_images.shape, train_labels_cat.shape, test_labels_cat.shape
((13000, 32, 32, 3), (6000, 32, 32, 3), (13000, 5, 11), (6000, 5, 11))
In [364]:
from keras.applications.vgg16 import VGG16

VGG16_model = VGG16(weights='imagenet', include_top=False)
bn_train = VGG16_model.predict(train_images)
bn_test = VGG16_model.predict(test_images)
In [367]:
In [369]:
bn_train = np.load('bn_train.npy')
bn_test = np.load('bn_test.npy')
bn_train = bn_train.reshape(-1, 1, 1, bn_train.shape[1])
bn_test = bn_test.reshape(-1, 1, 1, bn_test.shape[1])
In [455]:
def vgg16_add_model():
    model_input = Input(shape=(bn_train.shape[1:]))
    x = GlobalAveragePooling2D()(model_input)

    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    y1 = Dense(11, activation='softmax')(x)
    y2 = Dense(11, activation='softmax')(x)
    y3 = Dense(11, activation='softmax')(x)
    y4 = Dense(11, activation='softmax')(x)
    y5 = Dense(11, activation='softmax')(x) 
    model = Model(input=model_input, output=[y1, y2, y3, y4, y5])

    model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])
    return model
In [456]:
vgg16_add_model = vgg16_add_model()
vgg16_checkpointer = ModelCheckpoint(filepath='', 
                                     verbose=2, save_best_only=True)
In [457]:
vgg16_add_history =, train_labels_cat_list, 
                                        validation_data=(bn_test, test_labels_cat_list),
                                        epochs=50, batch_size=128, 
                                        callbacks=[vgg16_checkpointer], verbose=0);
Epoch 00000: val_loss improved from inf to 4.21974, saving model to
Epoch 00001: val_loss improved from 4.21974 to 4.06842, saving model to
Epoch 00002: val_loss improved from 4.06842 to 3.91739, saving model to
Epoch 00003: val_loss improved from 3.91739 to 3.85889, saving model to
Epoch 00004: val_loss did not improve
Epoch 00005: val_loss improved from 3.85889 to 3.80666, saving model to
Epoch 00006: val_loss improved from 3.80666 to 3.76943, saving model to
Epoch 00007: val_loss did not improve
Epoch 00008: val_loss improved from 3.76943 to 3.69798, saving model to
Epoch 00009: val_loss improved from 3.69798 to 3.67708, saving model to
Epoch 00010: val_loss improved from 3.67708 to 3.65413, saving model to
Epoch 00011: val_loss did not improve
Epoch 00012: val_loss improved from 3.65413 to 3.65114, saving model to
Epoch 00013: val_loss improved from 3.65114 to 3.62319, saving model to
Epoch 00014: val_loss did not improve
Epoch 00015: val_loss did not improve
Epoch 00016: val_loss did not improve
Epoch 00017: val_loss did not improve
Epoch 00018: val_loss improved from 3.62319 to 3.60385, saving model to
Epoch 00019: val_loss did not improve
Epoch 00020: val_loss improved from 3.60385 to 3.58485, saving model to
Epoch 00021: val_loss did not improve
Epoch 00022: val_loss did not improve
Epoch 00023: val_loss did not improve
Epoch 00024: val_loss did not improve
Epoch 00025: val_loss did not improve
Epoch 00026: val_loss did not improve
Epoch 00027: val_loss did not improve
Epoch 00028: val_loss did not improve
Epoch 00029: val_loss did not improve
Epoch 00030: val_loss did not improve
Epoch 00031: val_loss did not improve
Epoch 00032: val_loss did not improve
Epoch 00033: val_loss did not improve
Epoch 00034: val_loss did not improve
Epoch 00035: val_loss did not improve
Epoch 00036: val_loss did not improve
Epoch 00037: val_loss did not improve
Epoch 00038: val_loss did not improve
Epoch 00039: val_loss did not improve
Epoch 00040: val_loss did not improve
Epoch 00041: val_loss did not improve
Epoch 00042: val_loss did not improve
Epoch 00043: val_loss did not improve
Epoch 00044: val_loss did not improve
Epoch 00045: val_loss did not improve
Epoch 00046: val_loss did not improve
Epoch 00047: val_loss did not improve
Epoch 00048: val_loss did not improve
Epoch 00049: val_loss did not improve
In [458]:
vgg16_add_scores = vgg16_add_model.evaluate(bn_test, test_labels_cat_list, verbose=0)

print("VGG16 ADD Model. \n")
print("Scores: \n" , (vgg16_add_scores))
print("First digit. Accuracy: %.2f%%" % (vgg16_add_scores[6]*100))
print("Second digit. Accuracy: %.2f%%" % (vgg16_add_scores[7]*100))
print("Third digit. Accuracy: %.2f%%" % (vgg16_add_scores[8]*100))
print("Fourth digit. Accuracy: %.2f%%" % (vgg16_add_scores[9]*100))
print("Fifth digit. Accuracy: %.2f%%" % (vgg16_add_scores[10]*100))
VGG16 ADD Model. 

 [3.5848494021097821, 0.002440998214505271, 0.046322134738788011, 0.42428658004601799, 1.4356047627131143, 1.6761949189503988, 0.99983333333333335, 0.98683333333333334, 0.8763333333333333, 0.52033333333333331, 0.42566666666666669]
First digit. Accuracy: 99.98%
Second digit. Accuracy: 98.68%
Third digit. Accuracy: 87.63%
Fourth digit. Accuracy: 52.03%
Fifth digit. Accuracy: 42.57%
In [461]:
vgg16_avg_accuracy = sum([vgg16_add_scores[i] for i in range(6, 11)])/5

print("VGG16 Model. Average Accuracy: %.2f%%" % (vgg16_avg_accuracy*100))
VGG16 Model. Average Accuracy: 76.18%
In [459]:
filename = os.path.join('new', '6.png')
img = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB)
img_vgg16 = cv2.resize(img, (32, 32))

print ('File: ', new_filenames[26])
print ('Label: ', new_labels[26])
print ('Categorical label: \n', new_labels_cat[26])
File:  6.png
Label:  [10 10 10  1  0]
Categorical label: 
 [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]
In [460]:
img_vgg16 = img_vgg16.reshape(1,32,32,3)
predict_vgg16 = VGG16_model.predict(img_vgg16)
predict_vgg16_add = vgg16_add_model.predict(predict_vgg16)
predict_label_vgg16 = [np.argmax(y) for y in predict_vgg16_add]

print('Predicted label:')
Predicted label:
[10, 10, 10, 8, 0]


Provide additional documentation sufficient for detailing the implementation of the Android application or Python program for visualizing the classification of numbers in images. It should be clear how the program or application works. Demonstrations should be provided.

In [ ]: