删除ann_hands_on.ipynb (618c0c1d) · Commits · IntroML / IntroML_Chapter8

Hands_On/ANN/ann_hands_on.ipynb

deleted100644 → 0

+0 −274

Original line number	Diff line number	Diff line
		%% Cell type:code id:3e42ed4c-6f19-4ad3-a520-c4e31ce3d51b tags:

		``` python
		# NOTE:
		# You may choose to use ChatGPT (or any AI-based tool) to assist with your assignment,
		# but you must ensure that you fully understand the entire code.
		# You are solely responsible for the work you submit.
		# Please keep in mind: ChatGPT will not be available during the exam.
		```

		%% Cell type:code id:b219f95a-92a9-496c-a83d-576172854494 tags:

		``` python
		import numpy as np
		import pandas
		from sklearn.model_selection import train_test_split

		epochs = 5000
		learning_rate = 0.01

		NN_ARCHITECTURE = [
		{"input_dim": 4, "output_dim": 8},
		{"input_dim": 8, "output_dim": 16},
		{"input_dim": 16, "output_dim": 1},
		]
		```

		%% Cell type:code id:57e992c7-1309-42ca-972c-965663b037aa tags:

		``` python
		def init_layers(nn_architecture):
		# random seed initiation
		np.random.seed(1)
		# parameters storage initiation
		weights = {}

		# iteration over network layers
		for idx, layer in enumerate(nn_architecture):
		# we number network layers from 1
		layer_idx = idx + 1

		# extracting the number of units in layers
		layer_input_size = layer["input_dim"]
		layer_output_size = layer["output_dim"]

		# initiating the values of the W matrix
		# and vector b for subsequent layers
		weights['W' + str(layer_idx)] = np.random.randn(
		layer_output_size, layer_input_size) * 0.1
		weights['b' + str(layer_idx)] = np.random.randn(
		layer_output_size, 1) * 0.1

		return weights
		```

		%% Cell type:code id:425356cb-becc-4f49-be15-a9f454cfcfb0 tags:

		``` python
		def sigmoid(Z):
		return 1 / (1 + np.exp(-Z))
		```

		%% Cell type:code id:40fade10-dffd-41a4-bfd8-cd8d7ca44373 tags:

		``` python
		def sigmoid_backward(dA, Z):
		sig = sigmoid(Z)
		return dA * sig * (1 - sig)
		```

		%% Cell type:code id:7252697f-0df6-4378-9087-617b700daab5 tags:

		``` python
		def single_layer_forward_propagation(A_prev, W_curr, b_curr):
		# calculation of the input value for the activation function
		Z_curr = np.dot(W_curr, A_prev) + b_curr

		# return of calculated activation A and the intermediate Z matrix
		return sigmoid(Z_curr), Z_curr
		```

		%% Cell type:code id:72fe3975-85de-432d-b563-13331c283cdb tags:

		``` python
		def full_forward_propagation(X, weights, nn_architecture):
		# creating a temporary memory to store the information needed for a backward step
		memory = {}
		# X vector is the activation for input layer
		A_curr = X

		# iteration over network layers
		for idx, layer in enumerate(nn_architecture):
		# we number network layers from 1
		layer_idx = idx + 1
		# transfer the activation from the previous iteration
		A_prev = A_curr

		# extraction of W for the current layer
		W_curr = weights["W" + str(layer_idx)]
		# extraction of b for the current layer
		b_curr = weights["b" + str(layer_idx)]
		# calculation of activation for the current layer
		A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr)

		# saving calculated values in the memory
		memory["A" + str(idx)] = A_prev
		memory["Z" + str(layer_idx)] = Z_curr

		# return of prediction vector and a dictionary containing intermediate values
		return A_curr, memory
		```

		%% Cell type:code id:98ce69b3-4bed-41db-8fc2-c64651adeb08 tags:

		``` python
		def get_loss_value(Y_hat, Y):
		# number of examples
		m = Y_hat.shape[1]
		# calculation of the loss according to the formula
		cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
		return np.squeeze(cost)
		```

		%% Cell type:code id:427b4c41-f77b-4fff-af93-d6068de8ca40 tags:

		``` python
		def single_layer_backward_propagation(dA_curr, W_curr, Z_curr, A_prev):
		# number of examples
		m = A_prev.shape[1]

		# calculation of the activation function derivative
		dZ_curr = sigmoid_backward(dA_curr, Z_curr)

		# derivative of the matrix W
		dW_curr = np.dot(dZ_curr, A_prev.T) / m
		# derivative of the vector b
		db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
		# derivative of the matrix A_prev
		dA_prev = np.dot(W_curr.T, dZ_curr)

		return dA_prev, dW_curr, db_curr
		```

		%% Cell type:code id:1e8d56a1 tags:

		``` python
		def full_backward_propagation(Y_hat, Y, memory, weights, nn_architecture):
		local_grads = {}

		# a hack ensuring the same shape of the prediction vector and labels vector
		Y = Y.reshape(Y_hat.shape)

		# initiation of gradient descent algorithm
		dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))

		for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
		# we number network layers from 1
		layer_idx_curr = layer_idx_prev + 1

		dA_curr = dA_prev

		A_prev = memory["A" + str(layer_idx_prev)]
		Z_curr = memory["Z" + str(layer_idx_curr)]

		W_curr = weights["W" + str(layer_idx_curr)]

		dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
		dA_curr, W_curr, Z_curr, A_prev)

		local_grads["dW" + str(layer_idx_curr)] = dW_curr
		local_grads["db" + str(layer_idx_curr)] = db_curr

		return local_grads
		```

		%% Cell type:code id:be97761f tags:

		``` python
		def update(weights, local_grads, nn_architecture, learning_rate):
		# iteration over network layers
		for layer_idx, layer in enumerate(nn_architecture, 1):
		weights["W" + str(layer_idx)] -= learning_rate * local_grads["dW" + str(layer_idx)]
		weights["b" + str(layer_idx)] -= learning_rate * local_grads["db" + str(layer_idx)]

		return weights
		```

		%% Cell type:code id:ec5b331c tags:

		``` python
		def train(X, Y, nn_architecture, epochs, learning_rate):
		# initiation of neural net parameters
		weights = init_layers(nn_architecture)
		# initiation of lists storing the history
		# of metrics calculated during the learning process
		loss_history = []
		accuracy_history = []

		# performing calculations for subsequent iterations
		for i in range(epochs):
		# step forward
		Y_hat, cache_memory = full_forward_propagation(X, weights, nn_architecture)

		# calculating metrics and saving them in history
		loss = get_loss_value(Y_hat, Y)
		loss_history.append(loss)
		accuracy = get_accuracy_value(Y_hat, Y)
		accuracy_history.append(accuracy)

		# step backward - calculating gradient
		local_grads = full_backward_propagation(Y_hat, Y, cache_memory, weights, nn_architecture)
		# updating model state
		weights = update(weights, local_grads, nn_architecture, learning_rate)

		if (i % 50 == 0):
		print("Epoch: {:05} - loss: {:.5f} - accuracy: {:.5f}".format(i, loss, accuracy))

		return weights
		```

		%% Cell type:code id:0156c4d9 tags:

		``` python
		# an auxiliary function that converts probability into class
		def convert_prob_into_class(probs):
		probs_ = np.copy(probs)
		probs_[probs_ > 0.5] = 1
		probs_[probs_ <= 0.5] = 0
		return probs_
		```

		%% Cell type:code id:41f6c833 tags:

		``` python
		def get_accuracy_value(Y_hat, Y):
		Y_hat_ = convert_prob_into_class(Y_hat)
		return (Y_hat_ == Y).all(axis=0).mean()
		```

		%% Cell type:code id:b7db24cd tags:

		``` python
		if __name__ == "__main__":
		# Main function of script
		dataset = pandas.read_csv("/home/jovyan/work/iris.data.csv", header=None)
		# NOTE: Replace with the correct path to the iris.data.csv file on your system

		filtered_dataset = dataset.loc[(dataset.iloc[:, 4] == "Iris-setosa") \| (dataset.iloc[:, 4] == "Iris-virginica")]

		filtered_dataset.iloc[:, 4] = filtered_dataset.iloc[:, 4].astype('category')
		cat_columns = filtered_dataset.select_dtypes(['category']).columns
		filtered_dataset[cat_columns] = filtered_dataset[cat_columns].apply(lambda x: x.cat.codes)

		X = filtered_dataset.iloc[:, :4]
		y = filtered_dataset.iloc[:, 4]

		X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

		# Training
		weights = train(np.transpose(X_train), np.transpose(y_train.to_numpy().reshape((y_train.shape[0], 1))),
		NN_ARCHITECTURE, epochs, learning_rate)

		# Prediction
		Y_test_hat, cache = full_forward_propagation(np.transpose(X_test), weights, NN_ARCHITECTURE)

		# Accuracy achieved on the test set
		acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.to_numpy().reshape((y_test.shape[0], 1))))
		print("Test set accuracy: {:.2f}".format(acc_test))
		```

		%% Cell type:code id:a55f5072-0bb8-44bd-8525-eff6ace6c6a9 tags:

		``` python
		```