Loading Hands_On/ANN/ann_hands_on.ipynbdeleted 100644 → 0 +0 −274 Original line number Diff line number Diff line %% Cell type:code id:3e42ed4c-6f19-4ad3-a520-c4e31ce3d51b tags: ``` python # NOTE: # You may choose to use ChatGPT (or any AI-based tool) to assist with your assignment, # but you must ensure that you fully understand the entire code. # You are solely responsible for the work you submit. # Please keep in mind: ChatGPT will not be available during the exam. ``` %% Cell type:code id:b219f95a-92a9-496c-a83d-576172854494 tags: ``` python import numpy as np import pandas from sklearn.model_selection import train_test_split epochs = 5000 learning_rate = 0.01 NN_ARCHITECTURE = [ {"input_dim": 4, "output_dim": 8}, {"input_dim": 8, "output_dim": 16}, {"input_dim": 16, "output_dim": 1}, ] ``` %% Cell type:code id:57e992c7-1309-42ca-972c-965663b037aa tags: ``` python def init_layers(nn_architecture): # random seed initiation np.random.seed(1) # parameters storage initiation weights = {} # iteration over network layers for idx, layer in enumerate(nn_architecture): # we number network layers from 1 layer_idx = idx + 1 # extracting the number of units in layers layer_input_size = layer["input_dim"] layer_output_size = layer["output_dim"] # initiating the values of the W matrix # and vector b for subsequent layers weights['W' + str(layer_idx)] = np.random.randn( layer_output_size, layer_input_size) * 0.1 weights['b' + str(layer_idx)] = np.random.randn( layer_output_size, 1) * 0.1 return weights ``` %% Cell type:code id:425356cb-becc-4f49-be15-a9f454cfcfb0 tags: ``` python def sigmoid(Z): return 1 / (1 + np.exp(-Z)) ``` %% Cell type:code id:40fade10-dffd-41a4-bfd8-cd8d7ca44373 tags: ``` python def sigmoid_backward(dA, Z): sig = sigmoid(Z) return dA * sig * (1 - sig) ``` %% Cell type:code id:7252697f-0df6-4378-9087-617b700daab5 tags: ``` python def single_layer_forward_propagation(A_prev, W_curr, b_curr): # calculation of the input value for the activation function Z_curr = np.dot(W_curr, A_prev) + b_curr # return of calculated activation A and the intermediate Z matrix return sigmoid(Z_curr), Z_curr ``` %% Cell type:code id:72fe3975-85de-432d-b563-13331c283cdb tags: ``` python def full_forward_propagation(X, weights, nn_architecture): # creating a temporary memory to store the information needed for a backward step memory = {} # X vector is the activation for input layer A_curr = X # iteration over network layers for idx, layer in enumerate(nn_architecture): # we number network layers from 1 layer_idx = idx + 1 # transfer the activation from the previous iteration A_prev = A_curr # extraction of W for the current layer W_curr = weights["W" + str(layer_idx)] # extraction of b for the current layer b_curr = weights["b" + str(layer_idx)] # calculation of activation for the current layer A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr) # saving calculated values in the memory memory["A" + str(idx)] = A_prev memory["Z" + str(layer_idx)] = Z_curr # return of prediction vector and a dictionary containing intermediate values return A_curr, memory ``` %% Cell type:code id:98ce69b3-4bed-41db-8fc2-c64651adeb08 tags: ``` python def get_loss_value(Y_hat, Y): # number of examples m = Y_hat.shape[1] # calculation of the loss according to the formula cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T)) return np.squeeze(cost) ``` %% Cell type:code id:427b4c41-f77b-4fff-af93-d6068de8ca40 tags: ``` python def single_layer_backward_propagation(dA_curr, W_curr, Z_curr, A_prev): # number of examples m = A_prev.shape[1] # calculation of the activation function derivative dZ_curr = sigmoid_backward(dA_curr, Z_curr) # derivative of the matrix W dW_curr = np.dot(dZ_curr, A_prev.T) / m # derivative of the vector b db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m # derivative of the matrix A_prev dA_prev = np.dot(W_curr.T, dZ_curr) return dA_prev, dW_curr, db_curr ``` %% Cell type:code id:1e8d56a1 tags: ``` python def full_backward_propagation(Y_hat, Y, memory, weights, nn_architecture): local_grads = {} # a hack ensuring the same shape of the prediction vector and labels vector Y = Y.reshape(Y_hat.shape) # initiation of gradient descent algorithm dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat)) for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))): # we number network layers from 1 layer_idx_curr = layer_idx_prev + 1 dA_curr = dA_prev A_prev = memory["A" + str(layer_idx_prev)] Z_curr = memory["Z" + str(layer_idx_curr)] W_curr = weights["W" + str(layer_idx_curr)] dA_prev, dW_curr, db_curr = single_layer_backward_propagation( dA_curr, W_curr, Z_curr, A_prev) local_grads["dW" + str(layer_idx_curr)] = dW_curr local_grads["db" + str(layer_idx_curr)] = db_curr return local_grads ``` %% Cell type:code id:be97761f tags: ``` python def update(weights, local_grads, nn_architecture, learning_rate): # iteration over network layers for layer_idx, layer in enumerate(nn_architecture, 1): weights["W" + str(layer_idx)] -= learning_rate * local_grads["dW" + str(layer_idx)] weights["b" + str(layer_idx)] -= learning_rate * local_grads["db" + str(layer_idx)] return weights ``` %% Cell type:code id:ec5b331c tags: ``` python def train(X, Y, nn_architecture, epochs, learning_rate): # initiation of neural net parameters weights = init_layers(nn_architecture) # initiation of lists storing the history # of metrics calculated during the learning process loss_history = [] accuracy_history = [] # performing calculations for subsequent iterations for i in range(epochs): # step forward Y_hat, cache_memory = full_forward_propagation(X, weights, nn_architecture) # calculating metrics and saving them in history loss = get_loss_value(Y_hat, Y) loss_history.append(loss) accuracy = get_accuracy_value(Y_hat, Y) accuracy_history.append(accuracy) # step backward - calculating gradient local_grads = full_backward_propagation(Y_hat, Y, cache_memory, weights, nn_architecture) # updating model state weights = update(weights, local_grads, nn_architecture, learning_rate) if (i % 50 == 0): print("Epoch: {:05} - loss: {:.5f} - accuracy: {:.5f}".format(i, loss, accuracy)) return weights ``` %% Cell type:code id:0156c4d9 tags: ``` python # an auxiliary function that converts probability into class def convert_prob_into_class(probs): probs_ = np.copy(probs) probs_[probs_ > 0.5] = 1 probs_[probs_ <= 0.5] = 0 return probs_ ``` %% Cell type:code id:41f6c833 tags: ``` python def get_accuracy_value(Y_hat, Y): Y_hat_ = convert_prob_into_class(Y_hat) return (Y_hat_ == Y).all(axis=0).mean() ``` %% Cell type:code id:b7db24cd tags: ``` python if __name__ == "__main__": # Main function of script dataset = pandas.read_csv("/home/jovyan/work/iris.data.csv", header=None) # NOTE: Replace with the correct path to the iris.data.csv file on your system filtered_dataset = dataset.loc[(dataset.iloc[:, 4] == "Iris-setosa") | (dataset.iloc[:, 4] == "Iris-virginica")] filtered_dataset.iloc[:, 4] = filtered_dataset.iloc[:, 4].astype('category') cat_columns = filtered_dataset.select_dtypes(['category']).columns filtered_dataset[cat_columns] = filtered_dataset[cat_columns].apply(lambda x: x.cat.codes) X = filtered_dataset.iloc[:, :4] y = filtered_dataset.iloc[:, 4] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Training weights = train(np.transpose(X_train), np.transpose(y_train.to_numpy().reshape((y_train.shape[0], 1))), NN_ARCHITECTURE, epochs, learning_rate) # Prediction Y_test_hat, cache = full_forward_propagation(np.transpose(X_test), weights, NN_ARCHITECTURE) # Accuracy achieved on the test set acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.to_numpy().reshape((y_test.shape[0], 1)))) print("Test set accuracy: {:.2f}".format(acc_test)) ``` %% Cell type:code id:a55f5072-0bb8-44bd-8525-eff6ace6c6a9 tags: ``` python ``` Loading
Hands_On/ANN/ann_hands_on.ipynbdeleted 100644 → 0 +0 −274 Original line number Diff line number Diff line %% Cell type:code id:3e42ed4c-6f19-4ad3-a520-c4e31ce3d51b tags: ``` python # NOTE: # You may choose to use ChatGPT (or any AI-based tool) to assist with your assignment, # but you must ensure that you fully understand the entire code. # You are solely responsible for the work you submit. # Please keep in mind: ChatGPT will not be available during the exam. ``` %% Cell type:code id:b219f95a-92a9-496c-a83d-576172854494 tags: ``` python import numpy as np import pandas from sklearn.model_selection import train_test_split epochs = 5000 learning_rate = 0.01 NN_ARCHITECTURE = [ {"input_dim": 4, "output_dim": 8}, {"input_dim": 8, "output_dim": 16}, {"input_dim": 16, "output_dim": 1}, ] ``` %% Cell type:code id:57e992c7-1309-42ca-972c-965663b037aa tags: ``` python def init_layers(nn_architecture): # random seed initiation np.random.seed(1) # parameters storage initiation weights = {} # iteration over network layers for idx, layer in enumerate(nn_architecture): # we number network layers from 1 layer_idx = idx + 1 # extracting the number of units in layers layer_input_size = layer["input_dim"] layer_output_size = layer["output_dim"] # initiating the values of the W matrix # and vector b for subsequent layers weights['W' + str(layer_idx)] = np.random.randn( layer_output_size, layer_input_size) * 0.1 weights['b' + str(layer_idx)] = np.random.randn( layer_output_size, 1) * 0.1 return weights ``` %% Cell type:code id:425356cb-becc-4f49-be15-a9f454cfcfb0 tags: ``` python def sigmoid(Z): return 1 / (1 + np.exp(-Z)) ``` %% Cell type:code id:40fade10-dffd-41a4-bfd8-cd8d7ca44373 tags: ``` python def sigmoid_backward(dA, Z): sig = sigmoid(Z) return dA * sig * (1 - sig) ``` %% Cell type:code id:7252697f-0df6-4378-9087-617b700daab5 tags: ``` python def single_layer_forward_propagation(A_prev, W_curr, b_curr): # calculation of the input value for the activation function Z_curr = np.dot(W_curr, A_prev) + b_curr # return of calculated activation A and the intermediate Z matrix return sigmoid(Z_curr), Z_curr ``` %% Cell type:code id:72fe3975-85de-432d-b563-13331c283cdb tags: ``` python def full_forward_propagation(X, weights, nn_architecture): # creating a temporary memory to store the information needed for a backward step memory = {} # X vector is the activation for input layer A_curr = X # iteration over network layers for idx, layer in enumerate(nn_architecture): # we number network layers from 1 layer_idx = idx + 1 # transfer the activation from the previous iteration A_prev = A_curr # extraction of W for the current layer W_curr = weights["W" + str(layer_idx)] # extraction of b for the current layer b_curr = weights["b" + str(layer_idx)] # calculation of activation for the current layer A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr) # saving calculated values in the memory memory["A" + str(idx)] = A_prev memory["Z" + str(layer_idx)] = Z_curr # return of prediction vector and a dictionary containing intermediate values return A_curr, memory ``` %% Cell type:code id:98ce69b3-4bed-41db-8fc2-c64651adeb08 tags: ``` python def get_loss_value(Y_hat, Y): # number of examples m = Y_hat.shape[1] # calculation of the loss according to the formula cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T)) return np.squeeze(cost) ``` %% Cell type:code id:427b4c41-f77b-4fff-af93-d6068de8ca40 tags: ``` python def single_layer_backward_propagation(dA_curr, W_curr, Z_curr, A_prev): # number of examples m = A_prev.shape[1] # calculation of the activation function derivative dZ_curr = sigmoid_backward(dA_curr, Z_curr) # derivative of the matrix W dW_curr = np.dot(dZ_curr, A_prev.T) / m # derivative of the vector b db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m # derivative of the matrix A_prev dA_prev = np.dot(W_curr.T, dZ_curr) return dA_prev, dW_curr, db_curr ``` %% Cell type:code id:1e8d56a1 tags: ``` python def full_backward_propagation(Y_hat, Y, memory, weights, nn_architecture): local_grads = {} # a hack ensuring the same shape of the prediction vector and labels vector Y = Y.reshape(Y_hat.shape) # initiation of gradient descent algorithm dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat)) for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))): # we number network layers from 1 layer_idx_curr = layer_idx_prev + 1 dA_curr = dA_prev A_prev = memory["A" + str(layer_idx_prev)] Z_curr = memory["Z" + str(layer_idx_curr)] W_curr = weights["W" + str(layer_idx_curr)] dA_prev, dW_curr, db_curr = single_layer_backward_propagation( dA_curr, W_curr, Z_curr, A_prev) local_grads["dW" + str(layer_idx_curr)] = dW_curr local_grads["db" + str(layer_idx_curr)] = db_curr return local_grads ``` %% Cell type:code id:be97761f tags: ``` python def update(weights, local_grads, nn_architecture, learning_rate): # iteration over network layers for layer_idx, layer in enumerate(nn_architecture, 1): weights["W" + str(layer_idx)] -= learning_rate * local_grads["dW" + str(layer_idx)] weights["b" + str(layer_idx)] -= learning_rate * local_grads["db" + str(layer_idx)] return weights ``` %% Cell type:code id:ec5b331c tags: ``` python def train(X, Y, nn_architecture, epochs, learning_rate): # initiation of neural net parameters weights = init_layers(nn_architecture) # initiation of lists storing the history # of metrics calculated during the learning process loss_history = [] accuracy_history = [] # performing calculations for subsequent iterations for i in range(epochs): # step forward Y_hat, cache_memory = full_forward_propagation(X, weights, nn_architecture) # calculating metrics and saving them in history loss = get_loss_value(Y_hat, Y) loss_history.append(loss) accuracy = get_accuracy_value(Y_hat, Y) accuracy_history.append(accuracy) # step backward - calculating gradient local_grads = full_backward_propagation(Y_hat, Y, cache_memory, weights, nn_architecture) # updating model state weights = update(weights, local_grads, nn_architecture, learning_rate) if (i % 50 == 0): print("Epoch: {:05} - loss: {:.5f} - accuracy: {:.5f}".format(i, loss, accuracy)) return weights ``` %% Cell type:code id:0156c4d9 tags: ``` python # an auxiliary function that converts probability into class def convert_prob_into_class(probs): probs_ = np.copy(probs) probs_[probs_ > 0.5] = 1 probs_[probs_ <= 0.5] = 0 return probs_ ``` %% Cell type:code id:41f6c833 tags: ``` python def get_accuracy_value(Y_hat, Y): Y_hat_ = convert_prob_into_class(Y_hat) return (Y_hat_ == Y).all(axis=0).mean() ``` %% Cell type:code id:b7db24cd tags: ``` python if __name__ == "__main__": # Main function of script dataset = pandas.read_csv("/home/jovyan/work/iris.data.csv", header=None) # NOTE: Replace with the correct path to the iris.data.csv file on your system filtered_dataset = dataset.loc[(dataset.iloc[:, 4] == "Iris-setosa") | (dataset.iloc[:, 4] == "Iris-virginica")] filtered_dataset.iloc[:, 4] = filtered_dataset.iloc[:, 4].astype('category') cat_columns = filtered_dataset.select_dtypes(['category']).columns filtered_dataset[cat_columns] = filtered_dataset[cat_columns].apply(lambda x: x.cat.codes) X = filtered_dataset.iloc[:, :4] y = filtered_dataset.iloc[:, 4] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Training weights = train(np.transpose(X_train), np.transpose(y_train.to_numpy().reshape((y_train.shape[0], 1))), NN_ARCHITECTURE, epochs, learning_rate) # Prediction Y_test_hat, cache = full_forward_propagation(np.transpose(X_test), weights, NN_ARCHITECTURE) # Accuracy achieved on the test set acc_test = get_accuracy_value(Y_test_hat, np.transpose(y_test.to_numpy().reshape((y_test.shape[0], 1)))) print("Test set accuracy: {:.2f}".format(acc_test)) ``` %% Cell type:code id:a55f5072-0bb8-44bd-8525-eff6ace6c6a9 tags: ``` python ```