上传新文件 (c3d56691) · Commits · IntroML / IntroML_Chapter8

Hands_On/ANN/neural_network.ipynb

0 → 100644

+359 −0

Original line number	Diff line number	Diff line
		%% Cell type:code id:33cd0e26-8733-4431-ad2d-b0401c730b86 tags:

		``` python
		# NOTE:
		# You may choose to use ChatGPT (or any AI-based tool) to assist with your assignment,
		# but you must ensure that you fully understand the entire code.
		# You are solely responsible for the work you submit.
		# Please keep in mind: ChatGPT will not be available during the exam.
		```

		%% Cell type:code id:b5318366-2f17-4632-8a86-fbd29a6fc1dc tags:

		``` python
		import matplotlib.pyplot as plt
		import numpy as np
		!pip install seaborn
		import seaborn as sns
		from matplotlib import cm
		from sklearn.datasets import make_moons
		from sklearn.model_selection import train_test_split


		sns.set_style("whitegrid")


		NN_ARCHITECTURE = [
		{"input_dim": 2, "output_dim": 25, "activation": "relu"},
		{"input_dim": 25, "output_dim": 50, "activation": "relu"},
		{"input_dim": 50, "output_dim": 25, "activation": "relu"},
		{"input_dim": 25, "output_dim": 1, "activation": "sigmoid"},
		]
		```

		%% Output

		Requirement already satisfied: seaborn in /opt/conda/lib/python3.8/site-packages (0.13.2)
		Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /opt/conda/lib/python3.8/site-packages (from seaborn) (3.7.5)
		Requirement already satisfied: numpy!=1.24.0,>=1.20 in /opt/conda/lib/python3.8/site-packages (from seaborn) (1.24.4)
		Requirement already satisfied: pandas>=1.2 in /opt/conda/lib/python3.8/site-packages (from seaborn) (1.5.3)
		Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (21.3)
		Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.53.0)
		Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (9.5.0)
		Requirement already satisfied: pyparsing>=2.3.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.0.9)
		Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)
		Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.8.2)
		Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.1.1)
		Requirement already satisfied: importlib-resources>=3.2.0 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (5.10.0)
		Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.5)
		Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.8/site-packages (from pandas>=1.2->seaborn) (2022.4)
		Requirement already satisfied: zipp>=3.1.0 in /opt/conda/lib/python3.8/site-packages (from importlib-resources>=3.2.0->matplotlib!=3.6.1,>=3.4->seaborn) (3.9.0)
		Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.16.0)

		%% Cell type:code id:09bdf04e tags:

		``` python
		def init_layers(nn_architecture, seed=99):
		# random seed initiation
		np.random.seed(seed)
		# number of layers in our neural network
		number_of_layers = len(nn_architecture)
		# parameters storage initiation
		params_values = {}

		# iteration over network layers
		for idx, layer in enumerate(nn_architecture):
		# we number network layers from 1
		layer_idx = idx + 1

		# extracting the number of units in layers
		layer_input_size = layer["input_dim"]
		layer_output_size = layer["output_dim"]

		# initiating the values of the W matrix
		# and vector b for subsequent layers
		params_values['W' + str(layer_idx)] = np.random.randn(
		layer_output_size, layer_input_size) * 0.1
		params_values['b' + str(layer_idx)] = np.random.randn(
		layer_output_size, 1) * 0.1

		return params_values
		```

		%% Cell type:code id:6ef373b9 tags:

		``` python
		def sigmoid(Z):
		return 1/(1+np.exp(-Z))
		```

		%% Cell type:code id:be6b6e60 tags:

		``` python
		def relu(Z):
		return np.maximum(0,Z)
		```

		%% Cell type:code id:6b3dfb66 tags:

		``` python
		def sigmoid_backward(dA, Z):
		sig = sigmoid(Z)
		return dA * sig * (1 - sig)
		```

		%% Cell type:code id:7d0ac54b tags:

		``` python
		def relu_backward(dA, Z):
		dZ = np.array(dA, copy = True)
		dZ[Z <= 0] = 0
		return dZ
		```

		%% Cell type:code id:20b73b4c tags:

		``` python
		def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
		# calculation of the input value for the activation function
		Z_curr = np.dot(W_curr, A_prev) + b_curr

		# selection of activation function
		if activation == "relu":
		activation_func = relu
		elif activation == "sigmoid":
		activation_func = sigmoid
		else:
		raise Exception('Non-supported activation function')

		# return of calculated activation A and the intermediate Z matrix
		return activation_func(Z_curr), Z_curr
		```

		%% Cell type:code id:b503ea33 tags:

		``` python
		def full_forward_propagation(X, params_values, nn_architecture):
		# creating a temporary memory to store the information needed for a backward step
		memory = {}
		# X vector is the activation for layer 0
		A_curr = X

		# iteration over network layers
		for idx, layer in enumerate(nn_architecture):
		# we number network layers from 1
		layer_idx = idx + 1
		# transfer the activation from the previous iteration
		A_prev = A_curr

		# extraction of the activation function for the current layer
		activ_function_curr = layer["activation"]
		# extraction of W for the current layer
		W_curr = params_values["W" + str(layer_idx)]
		# extraction of b for the current layer
		b_curr = params_values["b" + str(layer_idx)]
		# calculation of activation for the current layer
		A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)

		# saving calculated values in the memory
		memory["A" + str(idx)] = A_prev
		memory["Z" + str(layer_idx)] = Z_curr

		# return of prediction vector and a dictionary containing intermediate values
		return A_curr, memory
		```

		%% Cell type:code id:9055e587 tags:

		``` python
		def get_loss_value(Y_hat, Y):
		# number of examples
		m = Y_hat.shape[1]
		# calculation of the loss according to the formula
		cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
		return np.squeeze(cost)
		```

		%% Cell type:code id:c0a61767 tags:

		``` python
		# an auxiliary function that converts probability into class
		def convert_prob_into_class(probs):
		probs_ = np.copy(probs)
		probs_[probs_ > 0.5] = 1
		probs_[probs_ <= 0.5] = 0
		return probs_
		```

		%% Cell type:code id:eec09cca tags:

		``` python
		def get_accuracy_value(Y_hat, Y):
		Y_hat_ = convert_prob_into_class(Y_hat)
		return (Y_hat_ == Y).all(axis=0).mean()
		```

		%% Cell type:code id:78020a4e tags:

		``` python
		def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):
		# number of examples
		m = A_prev.shape[1]

		# selection of activation function
		if activation == "relu":
		backward_activation_func = relu_backward
		elif activation == "sigmoid":
		backward_activation_func = sigmoid_backward
		else:
		raise Exception('Non-supported activation function')

		# calculation of the activation function derivative
		dZ_curr = backward_activation_func(dA_curr, Z_curr)

		# derivative of the matrix W
		dW_curr = np.dot(dZ_curr, A_prev.T) / m
		# derivative of the vector b
		db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
		# derivative of the matrix A_prev
		dA_prev = np.dot(W_curr.T, dZ_curr)

		return dA_prev, dW_curr, db_curr
		```

		%% Cell type:code id:3a073a8d tags:

		``` python
		def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
		grads_values = {}

		# number of examples
		m = Y.shape[1]
		# a hack ensuring the same shape of the prediction vector and labels vector
		Y = Y.reshape(Y_hat.shape)

		# initiation of gradient descent algorithm
		dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));

		for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
		# we number network layers from 1
		layer_idx_curr = layer_idx_prev + 1
		# extraction of the activation function for the current layer
		activ_function_curr = layer["activation"]

		dA_curr = dA_prev

		A_prev = memory["A" + str(layer_idx_prev)]
		Z_curr = memory["Z" + str(layer_idx_curr)]

		W_curr = params_values["W" + str(layer_idx_curr)]
		b_curr = params_values["b" + str(layer_idx_curr)]

		dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
		dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)

		grads_values["dW" + str(layer_idx_curr)] = dW_curr
		grads_values["db" + str(layer_idx_curr)] = db_curr

		return grads_values
		```

		%% Cell type:code id:e8967fef tags:

		``` python
		def update(params_values, grads_values, nn_architecture, learning_rate):

		# iteration over network layers
		for layer_idx, layer in enumerate(nn_architecture, 1):
		params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]
		params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

		return params_values
		```

		%% Cell type:code id:a188fb6a tags:

		``` python
		def train(X, Y, nn_architecture, epochs, learning_rate, verbose=False, callback=None):
		# initiation of neural net parameters
		params_values = init_layers(nn_architecture, 2)
		# initiation of lists storing the history
		# of metrics calculated during the learning process
		loss_history = []
		accuracy_history = []

		# performing calculations for subsequent iterations
		for i in range(epochs):
		# step forward
		Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)

		# calculating metrics and saving them in history
		loss = get_loss_value(Y_hat, Y)
		loss_history.append(loss)
		accuracy = get_accuracy_value(Y_hat, Y)
		accuracy_history.append(accuracy)

		# step backward - calculating gradient
		grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
		# updating model state
		params_values = update(params_values, grads_values, nn_architecture, learning_rate)

		if (i % 50 == 0):
		if (verbose):
		print("Iteration: {:05} - loss: {:.5f} - accuracy: {:.5f}".format(i, loss, accuracy))
		if (callback is not None):
		callback(i, params_values)

		return params_values
		```

		%% Cell type:code id:b931e96b tags:

		``` python
		# the function making up the graph of a dataset
		def make_plot(X, y, plot_name, file_name=None, XX=None, YY=None, preds=None, dark=False):
		if (dark):
		plt.style.use('dark_background')
		else:
		sns.set_style("whitegrid")
		plt.figure(figsize=(16,12))
		axes = plt.gca()
		axes.set(xlabel="$X_1$", ylabel="$X_2$")
		plt.title(plot_name, fontsize=30)
		plt.subplots_adjust(left=0.20)
		plt.subplots_adjust(right=0.80)
		if(XX is not None and YY is not None and preds is not None):
		plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha = 1, cmap=cm.Spectral)
		plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
		plt.scatter(X[:, 0], X[:, 1], c=y.ravel(), s=40, cmap=plt.cm.Spectral, edgecolors='black')
		if(file_name):
		plt.savefig(file_name)
		plt.close()
		```

		%% Cell type:code id:4faef163-a86e-45ee-9331-69f608bf48fa tags:

		``` python
		# number of samples in the data set
		N_SAMPLES = 1000
		# ratio between training and test sets
		TEST_SIZE = 0.1

		X, y = make_moons(n_samples = N_SAMPLES, noise=0.2, random_state=100)
		X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)

		# Training
		params_values = train(np.transpose(X_train), np.transpose(y_train.reshape((y_train.shape[0], 1))), NN_ARCHITECTURE, 10000, 0.01, verbose=True)

		# Prediction
		Y_test_hat, _ = full_forward_propagation(np.transpose(X_test), params_values, NN_ARCHITECTURE)

		# Accuracy achieved on the test set
		acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.reshape((y_test.shape[0], 1))))
		print("Test set accuracy: {:.2f} - David".format(acc_test))
		```

		%% Cell type:code id:9395cc6d-67c9-40a7-bb80-bfb85c122f80 tags:

		``` python
		```