Machine Learning Engineer Nanodegree

Supervised Learning; Deep Learning

📑   Practice Project 1: Neural Networks Mini Projects

In [1]:
In [42]:
import numpy as np
import pandas as pd

from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Activation

import tensorflow as tf
In [3]:
# In this exercise, you will add in code that decides 
# whether a perceptron will fire based on the threshold. 

class Perceptron:
    This class models an artificial neuron with step activation function.
    def __init__(self, weights = np.array([1]), threshold = 0):
        Initialize weights and threshold based on input arguments. Note that no
        type-checking is being performed here for simplicity.
        self.weights = weights
        self.threshold = threshold
    def activate(self,inputs):
        Takes in @param inputs, a list of numbers equal to length of weights.
        @return the output of a threshold perceptron with given inputs based on
        perceptron weights and threshold.

        # The strength with which the perceptron fires.
        strength =, inputs)
        # TODO: return 0 or 1 based on the threshold
        if strength <= self.threshold :
            self.result = 0 # TODO
            self.result = 1 # TODO    
        return self.result

def test():
    A few tests to make sure that the perceptron class performs as expected.
    Nothing should show up in the output if all the assertions pass.
    p1 = Perceptron(np.array([1, 2]), 0.)
    assert p1.activate(np.array([ 1,-1])) == 0 # < threshold --> 0
    assert p1.activate(np.array([-1, 1])) == 1 # > threshold --> 1
    assert p1.activate(np.array([ 2,-1])) == 0 # on threshold --> 0

if __name__ == "__main__":

Threshold Meditation

The main advantage of having a threshold be set to a perceptron is being able to control when a perceptron should fire and when it shouldn't. This gives us control on the sensitivity of our neurons thereby helping us influence the desired output.

Question 1

What do you think the advantage of a perceptron is, compared with simply returning the dot product without a threshold?

Answer 1

Combining dot products with thresholds makes possible classification (more or less than the certain value) and regression (how close targets to the needed points).

Quiz: Perceptron Update Rule

In [3]:
# ----------
# In this exercise, you will update the perceptron class so that it can update
# its weights.
# Finish writing the update() method so that it updates the weights according
# to the perceptron update rule. Updates should be performed online, revising
# the weights after each data point.
# ----------

import numpy as np

class Perceptron:
    This class models an artificial neuron with step activation function.
    def __init__(self, weights = np.array([1]), threshold = 0):
        Initialize weights and threshold based on input arguments. Note that no
        type-checking is being performed here for simplicity.
        self.weights = weights.astype(float) 
        self.threshold = threshold

    def activate(self, values):
        Takes in @param values, a list of numbers equal to length of weights.
        @return the output of a threshold perceptron with given inputs based on
        perceptron weights and threshold.
        # First calculate the strength with which the perceptron fires
        strength =,self.weights)
        # Then return 0 or 1 depending on strength compared to threshold  
        return int(strength > self.threshold)

    def update(self, values, train, eta=.1):
        Takes in a 2D array @param values consisting of a LIST of inputs and a
        1D array @param train, consisting of a corresponding list of expected
        outputs. Updates internal weights according to the perceptron training
        rule using these values and an optional learning rate, @param eta.

        # For each data point:
        for data_point in range(len(values)):
            # TODO: Obtain the neuron's prediction for the data_point --> values[data_point]
            prediction = self.activate(values[data_point]) # TODO
            # Get the prediction accuracy calculated as (expected value - predicted value)
            # expected value = train[data_point], predicted value = prediction
            error = train[data_point] - prediction
            # TODO: update self.weights based on the multiplication of:
            # - prediction accuracy(error)
            # - learning rate(eta)
            # - input value(values[data_point])
            weight_update = error*values[data_point]*eta# TODO
            self.weights += weight_update

def test():
    A few tests to make sure that the perceptron class performs as expected.
    Nothing should show up in the output if all the assertions pass.
    def sum_almost_equal(array1, array2, tol = 1e-6):
        return sum(abs(array1 - array2)) < tol

    p1 = Perceptron(np.array([1,1,1]),0)
    p1.update(np.array([[2,0,-3]]), np.array([1]))
    assert sum_almost_equal(p1.weights, np.array([1.2, 1, 0.7]))

    p2 = Perceptron(np.array([1,2,3]),0)
    assert sum_almost_equal(p2.weights, np.array([0.7, 1.8, 2.9]))

    p3 = Perceptron(np.array([3,0,2]),0)
    assert sum_almost_equal(p3.weights, np.array([2.7, -0.3, 1.7]))

if __name__ == "__main__":
In [4]:
In [52]:
# Quiz: Layered Network Example
In [2]:
# Quiz: Linear Representational Power
3+2*(-1)+(-1)*3, 2+2*4+(-1)*(-5)
(-2, 15)

Deep Neural Networks

In [4]:
# TODO: Set weight1, weight2, and bias
weight1 = 0.5
weight2 = 0.5
bias = -1.0

# Inputs and outputs
test_inputs = [(0, 0), (0, 1), (1, 0), (1, 1)]
correct_outputs = [False, False, False, True]
outputs = []

# Generate and check output
for test_input, correct_output in zip(test_inputs, correct_outputs):
    linear_combination = weight1 * test_input[0] + weight2 * test_input[1] + bias
    output = int(linear_combination >= 0)
    is_correct_string = 'Yes' if output == correct_output else 'No'
    outputs.append([test_input[0], test_input[1], linear_combination, output, is_correct_string])

# Print output
num_wrong = len([output[4] for output in outputs if output[4] == 'No'])
output_frame = pd.DataFrame(outputs, columns=['Input 1', '  Input 2', '  Linear Combination', 
                                              '  Activation Output', '  Is Correct'])
if not num_wrong:
    print('Nice!  You got it all correct.\n')
    print('You got {} wrong.  Keep trying!\n'.format(num_wrong))
Nice!  You got it all correct.

Input 1    Input 2    Linear Combination    Activation Output   Is Correct
      0          0                  -1.0                    0          Yes
      0          1                  -0.5                    0          Yes
      1          0                  -0.5                    0          Yes
      1          1                   0.0                    1          Yes
In [5]:
# TODO: Set weight1, weight2, and bias
weight1 = 0.0
weight2 = -1.0
bias = 0.0

# Inputs and outputs
test_inputs = [(0, 0), (0, 1), (1, 0), (1, 1)]
correct_outputs = [True, False, True, False]
outputs = []

# Generate and check output
for test_input, correct_output in zip(test_inputs, correct_outputs):
    linear_combination = weight1 * test_input[0] + weight2 * test_input[1] + bias
    output = int(linear_combination >= 0)
    is_correct_string = 'Yes' if output == correct_output else 'No'
    outputs.append([test_input[0], test_input[1], linear_combination, output, is_correct_string])

# Print output
num_wrong = len([output[4] for output in outputs if output[4] == 'No'])
output_frame = pd.DataFrame(outputs, columns=['Input 1', '  Input 2', 
                                              '  Linear Combination', '  Activation Output', '  Is Correct'])
if not num_wrong:
    print('Nice!  You got it all correct.\n')
    print('You got {} wrong.  Keep trying!\n'.format(num_wrong))
Nice!  You got it all correct.

Input 1    Input 2    Linear Combination    Activation Output   Is Correct
      0          0                   0.0                    1          Yes
      0          1                  -1.0                    0          Yes
      1          0                   0.0                    1          Yes
      1          1                  -1.0                    0          Yes
In [6]:
# Setting the random seed, feel free to change it and see different solutions.

def stepFunction(t):
    if t >= 0:
        return 1
    return 0

def prediction(X, W, b):
    return stepFunction((np.matmul(X,W)+b)[0])

# TODO: Fill in the code below to implement the perceptron trick.
# The function should receive as inputs the data X, the labels y,
# the weights W (as an array), and the bias b,
# update the weights and bias W, b, according to the perceptron algorithm,
# and return W and b.
def perceptronStep(X, y, W, b, learn_rate = 0.01):
    # Fill in code
    for i in range(len(X)):
        delta_y= y[i] - prediction(X[i],W,b)
        if delta_y == 1:
            for j in range(X.shape[1]):
                W[j] += X[i][j]*learn_rate
            b += learn_rate
        elif delta_y == -1:
            for j in range(X.shape[1]):
                W[j] -= X[i][j]*learn_rate
            b -= learn_rate
    return W, b
# This function runs the perceptron algorithm repeatedly on the dataset,
# and returns a few of the boundary lines obtained in the iterations,
# for plotting purposes.
# Feel free to play with the learning rate and the num_epochs,
# and see your results plotted below.
def trainPerceptronAlgorithm(X, y, learn_rate = 0.01, num_epochs = 25):
    x_min, x_max = min(X.T[0]), max(X.T[0])
    y_min, y_max = min(X.T[1]), max(X.T[1])
    W = np.array(np.random.rand(2,1))
    b = np.random.rand(1)[0] + x_max
    # These are the solution lines that get plotted below.
    boundary_lines = []
    for i in range(num_epochs):
        # In each epoch, we apply the perceptron step.
        W, b = perceptronStep(X, y, W, b, learn_rate)
        boundary_lines.append((-W[0]/W[1], -b/W[1]))
    return boundary_lines
In [17]:
def sigmoid(x): return 1/(1+np.e**(-x))
In [18]:
def function_example(x): return 4*x[0] + 5*x[1] - 9 
In [22]:
In [25]:
def softmax(x): 
    y = np.exp(x - np.max(x))
    return y / y.sum(axis=0)
In [26]:
# Write a function that takes as input a list of numbers, and returns
# the list of values given by the softmax function.
def softmax(L): 
    return 1.0*np.exp(L)/ np.exp(L).sum(axis=0)
In [28]:
def cross_entropy(Y, P):
    Y, P = np.float(Y), np.float(P)
    return -np.sum(Y * np.log(P) + (1 - Y) * np.log(1 - P))
In [32]:
# Setting the random seed, feel free to change it and see different solutions.

def sigmoid(x):
    return 1/(1+np.exp(-x))
def sigmoid_prime(x):
    return sigmoid(x)*(1-sigmoid(x))
def prediction(X, W, b):
    return sigmoid(np.matmul(X,W)+b)
def error_vector(y, y_hat):
    return [-y[i]*np.log(y_hat[i]) - (1-y[i])*np.log(1-y_hat[i]) for i in range(len(y))]
def error(y, y_hat):
    ev = error_vector(y, y_hat)
    return sum(ev)/len(ev)

# TODO: Fill in the code below to calculate the gradient of the error function.
# The result should be a list of three lists:
# The first list should contain the gradient (partial derivatives) with respect to w1
# The second list should contain the gradient (partial derivatives) with respect to w2
# The third list should contain the gradient (partial derivatives) with respect to b
def dErrors(X, y, y_hat):
    delta_y = [y[i]-y_hat[i] for i in range(len(y))]
    DErrorsDx1 = [-X[i][0]*(delta_y[i]) for i in range(len(X))]
    DErrorsDx2 = [-X[i][1]*(delta_y[i]) for i in range(len(X))]
    DErrorsDb = [-(delta_y[i]) for i in range(len(X))]
    return DErrorsDx1, DErrorsDx2, DErrorsDb

# TODO: Fill in the code below to implement the gradient descent step.
# The function should receive as inputs the data X, the labels y,
# the weights W (as an array), and the bias b.
# It should calculate the prediction, the gradients, and use them to
# update the weights and bias W, b. Then return W and b.
# The error e will be calculated and returned for you, for plotting purposes.
def gradientDescentStep(X, y, W, b, learn_rate = 0.01):
    # TODO: Calculate the prediction
    # TODO: Calculate the gradient
    # TODO: Update the weights
    # This calculates the error
    y_hat = prediction(X,W,b)
    e = error(y, y_hat)
    d_errors = dErrors(X, y, y_hat)
    W[0] -= sum(d_errors[0])*learn_rate
    W[1] -= sum(d_errors[1])*learn_rate
    b -= sum(d_errors[2])*learn_rate
    return W, b, e

# This function runs the perceptron algorithm repeatedly on the dataset,
# and returns a few of the boundary lines obtained in the iterations,
# for plotting purposes.
# Feel free to play with the learning rate and the num_epochs,
# and see your results plotted below.
def trainLR(X, y, learn_rate = 0.01, num_epochs = 100):
    x_min, x_max = min(X.T[0]), max(X.T[0])
    y_min, y_max = min(X.T[1]), max(X.T[1])
    # Initialize the weights randomly
    W = np.array(np.random.rand(2,1))*2 -1
    b = np.random.rand(1)[0]*2 - 1
    # These are the solution lines that get plotted below.
    boundary_lines = []
    errors = []
    for i in range(num_epochs):
        # In each epoch, we apply the gradient descent step.
        W, b, error = gradientDescentStep(X, y, W, b, learn_rate)
        boundary_lines.append((-W[0]/W[1], -b/W[1]))
    return boundary_lines, errors
In [37]:
sigmoid(3*0.4 + 5*0.6 + (-2.2))
In [48]:
(1 - sigmoid(1 + 1)) + (sigmoid(-1 -1) - 0)
In [49]:
(1 - sigmoid(10 + 10)) + (sigmoid(-10 - 10) - 0)
In [43]:
# Using TensorFlow 1.0.0; use tf.python_io in later versions
tf.python.control_flow_ops = tf

# Set random seed

# Our data
X = np.array([[0,0],[0,1],[1,0],[1,1]]).astype('float32')
y = np.array([[0],[1],[1],[0]]).astype('float32')

# Building the model
xor = Sequential()

# Add required layers
xor.add(Dense(64, input_dim=2, 
# Specify loss as "binary_crossentropy", optimizer as "adam",
# and add the accuracy metric

# Uncomment this line to print the model architecture

# Fitting the model
history =, y, epochs=50, verbose=0)

# Scoring the model
score = xor.evaluate(X, y)
print("\nAccuracy: ", score[-1])

# Checking the predictions
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 64)                192       
dense_4 (Dense)              (None, 1)                 65        
Total params: 257
Trainable params: 257
Non-trainable params: 0
4/4 [==============================] - 0s

Accuracy:  1.0

4/4 [==============================] - 0s
[[ 0.4941037 ]
 [ 0.52951396]
 [ 0.53265125]
 [ 0.46820125]]