Skip to content

Instantly share code, notes, and snippets.

@Anderssorby
Last active November 16, 2017 01:09
Show Gist options
  • Save Anderssorby/ea6d009188bd8e6cbd0c15dc9a613ba2 to your computer and use it in GitHub Desktop.
Save Anderssorby/ea6d009188bd8e6cbd0c15dc9a613ba2 to your computer and use it in GitHub Desktop.
Neural network using numpy on binary data
import numpy as np
from itertools import product
from timeit import default_timer as timer
import matplotlib.pyplot as plt
# Deterministic
np.random.seed(1234)
# Data
dim = 4
data_size = 2**dim
X = np.empty((data_size,dim))
Y = np.empty(data_size)
for n, x in enumerate(product(*([[0, 1]] * dim))):
target = int(sum(x) == 2) # the function we want to predict
X[n] = x
Y[n] = target
# Trainig data
training_size = data_size # =< data_size
target1 = np.array(Y[:training_size])
input1 = np.array(X[:training_size])
# Model parameters
num_epochs = 100000
echos = 10
echo_freq = min(num_epochs // echos, 5000)
etha = 0.1 # learning rate
precision = 1e-4 # Stopping criterion
# Some activation functions and their derivatives
def sigmoid(a):
return 1/(1+np.exp(-a))
def sigmoid_prime(a):
return sigmoid(a)*(1-sigmoid(a))
def id(z):
return z
def id_prime(z):
return 1
def ReLU(a):
b = np.copy(a)
for i in range(len(a)):
b[i] = np.max(a[i],0)
return b
def ReLU_prime(a):
b = np.copy(a)
for i in range(len(a)):
if a[i] <= 0:
b[i] = 0
else:
b[i] = 1
return b
# Activation function
h = sigmoid
h_prime = sigmoid_prime
activation_functions = [sigmoid, sigmoid, id]
act_prime = [sigmoid_prime, sigmoid_prime, id]
# the output dimension of each layer
# including the input and output layer
layer_shape = [dim, 5, 10, 1]
num_layers = len(layer_shape) - 1
# Initializing layers
layers = []
biases = []
for l in range(num_layers):
layers.append(np.random.uniform(size=(layer_shape[l+1], layer_shape[l])))
biases.append(np.random.uniform(size=layer_shape[l+1]))
def compute(inp):
output = [inp]
a = inp
for l in range(num_layers):
# for each step transform the data using the activation function h
h = activation_functions[l]
w = layers[l]
z = np.dot(w, a)# + biases[l]
output.append(z)
a = h(z)
return output, a
# Training eg. minimize the error function with SGD
start = timer()
errors = []
wprev = layers.copy()
bprev = biases.copy()
for epoch in range(num_epochs):
shuffled = np.arange(training_size)
np.random.shuffle(shuffled)
sumerr = 0
for t in shuffled:
# Calculate the output for each layer
output, result = compute(input1[t])
# The sign is important
error = result - target1[t]
# Using the l2-norm
sumerr += np.sqrt(sum(np.square(error)))
# Backpropagation
# Update the weights
# w = w - etha*\del E(w)/\del w
delta = etha*error
for l in reversed(range(num_layers)):
w = layers[l]
z = output[l]
b = biases[l]
h = activation_functions[l]
layers[l] = w - np.outer(delta, h(z))
biases[l] = b - delta
delta = w.T.dot(delta) * h_prime(z)
sumerr = sumerr/training_size
errors.append(sumerr)
# if len(errors) > 2 and errors[-1] - errors[-2] < 1e-5:
# print(f'\nStopping at {epoch}th epoch with error {sumerr}')
# break
if sumerr < precision:
print(f'\nStopping at {epoch}th epoch with error {sumerr}')
break
if epoch % echo_freq == 0:
print(f'Epoch {epoch} - Current error:{sumerr}')
diff_w = [np.sum(layers[l] - wprev[l]) for l in range(num_layers)]
diff_b = [np.sum(biases[l] - bprev[l]) for l in range(num_layers)]
diff = np.linalg.norm(diff_w) + np.linalg.norm(diff_b)
print(f'Difference {diff}')
wprev = layers.copy()
bprev = biases.copy()
end_training = timer()
print(f'\nTrainig took {end_training-start}s\n')
# Plotting convergence
plt.plot(errors)
plt.show()
# Testing
for i in range(data_size):
output, result = compute(X[i])
test_error = np.sum(np.abs(result-Y[i]))
if i >= training_size:
print("---Testdata---")
print(f'Test f({X[i]}) = {result}, target = {Y[i]}, error = {test_error}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment