We generate data using Sklearn's make_circles function, split it into a training and test set, and plot with Matplotlib.
import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import make_circles n_samples = 1000 n_features = 2 n_ouputs = 1 X, y = make_circles(n_samples = n_samples, factor = .01, noise = .2) n_TRAIN = int(.75 * n_samples) X_train = X[0:n_TRAIN, :] y_train = y[0:n_TRAIN] X_test = X[n_TRAIN:n_samples, :] y_test = y[n_TRAIN:n_samples] fig = plt.figure(figsize=(8, 8)) plt.scatter(X[:,0], X[:,1], c = y) plt.xlabel("X1") plt.ylabel("X2") plt.savefig('nn_plot.pdf', bbox_inches='tight')
class NN(): def __init__(self, n_samples, n_features, n_outputs, n_hidden = 1): self.n_samples = n_samples self.n_features = n_features self.n_hidden = n_hidden self.n_outputs = n_outputs self.W_h = np.random.randn(n_features, n_hidden) self.b_h = .01 + np.zeros((1, n_hidden)) self.W_o = np.random.randn(n_hidden, n_outputs) self.b_o = .01 + np.zeros((1, n_outputs)) def sigmoid(self, x): return 1/(1 + np.exp(-x)) def loss(self, y, p_pred): return -1/y.shape[0] * (np.sum(y * np.log(p_pred) + (1 - y) * (np.log(1 - p_pred)))) def predict(self, X): return np.squeeze(np.round(self.forward_prop(X)["O"])) def forward_prop(self, X): # Hidden layer A_h = X @ self.W_h + self.b_h H = self.sigmoid(A_h) # Output layer A_o = H @ self.W_o + self.b_o O = self.sigmoid(A_o) return { "A_h": A_h, "H": H, "A_o": A_o, "O": O } # This is not a true implmentation of backprop def backward_prop(self, X, y_, forward): one_n = np.ones(self.n_samples) y = (y_[np.newaxis]).T # convert to column vector dA_o = (y - forward["O"]) dL_dW_o = 1/self.n_samples * forward["H"].T @ dA_o dL_db_o = 1/self.n_samples * one_n.T @ dA_o dA_h = (dA_o @ self.W_o.T) * (self.sigmoid(forward["A_h"]) * (1 - self.sigmoid(forward["A_h"]))) dL_dW_h = 1/self.n_samples * X.T @ dA_h dL_db_h = 1/self.n_samples * one_n.T @ dA_h return { "dL_dW_h": dL_dW_h, "dL_db_h": dL_db_h, "dL_dW_o": dL_dW_o, "dL_db_o": dL_db_o } def train(self, X, y, learning_rate = .5, max_iter = 1001): for i in range(0, max_iter): forward_prop_dict = self.forward_prop(X) G = self.backward_prop(X, y, forward_prop_dict) # Gradient step self.W_h = self.W_h + learning_rate * G["dL_dW_h"] self.b_h = self.b_h + learning_rate * G["dL_db_h"] self.W_o = self.W_o + learning_rate * G["dL_dW_o"] self.b_o = self.b_o + learning_rate * G["dL_db_o"] if i % 100 == 0: print(f"Iteration: {i}, Training Loss: {self.loss(y, np.squeeze(forward_prop_dict['O']))}")We use $10$ hidden units in the hidden layer and report the 0-1 accuracy on both the training and test sets.
nn = NN(n_samples = n_TRAIN, n_features = n_features, n_outputs = n_ouputs, n_hidden = 10) nn.train(X_train, y_train) print("Train accuracy:", 1/X_train.shape[0] * np.sum(nn.predict(X_train) == y_train)) print("Test accuracy:", 1/X_test.shape[0] * np.sum(nn.predict(X_test) == y_test))
Iteration: 0, Training Loss: 0.7119608071592811 Iteration: 100, Training Loss: 0.639833196437629 Iteration: 200, Training Loss: 0.5215796424999427 Iteration: 300, Training Loss: 0.368528633036507 Iteration: 400, Training Loss: 0.25561713311943096 Iteration: 500, Training Loss: 0.1887819942694729 Iteration: 600, Training Loss: 0.14915555078792483 Iteration: 700, Training Loss: 0.12416122322345967 Iteration: 800, Training Loss: 0.10734573047261009 Iteration: 900, Training Loss: 0.0953975198921105 Iteration: 1000, Training Loss: 0.08652396074499336 Train accuracy: 0.9906666666666666 Test accuracy: 0.992
References.
https://github.com/zotroneneis/machine_learning_basics/blob/master/simple_neural_net.ipynbCybenko, G. (1989) "Approximations by superpositions of sigmoidal functions", Mathematics of Control, Signals, and Systems, 2(4), 303–314. doi:10.1007/BF02551274
Kurt Hornik (1991) "Approximation Capabilities of Multilayer Feedforward Networks", Neural Networks, 4(2), 251–257. doi:10.1016/0893-6080(91)90009-T
Tweets by austindavbrown