from typing import Optional from nn.activation import ACTIVATIONS, Activation from nn.loss import LOSSES, Loss import numpy as np import gradio as gr DTYPE = np.float32 class NN: def __init__( self, epochs: int, learning_rate: float, hidden_size: int, input_size: int, output_size: int, activation_fn: str, loss_fn: str, seed: int, ) -> None: self.epochs = epochs self.learning_rate = learning_rate self.hidden_size = hidden_size self.input_size = input_size self.output_size = output_size self.seed = seed # try to get activation function and loss funciton act_fn = ACTIVATIONS.get(activation_fn, None) if act_fn is None: raise KeyError(f"Invalid Activation function '{activation_fn}'") loss_fn = LOSSES.get(loss_fn, None) if loss_fn is None: raise KeyError(f"Invalid Activation function '{activation_fn}'") self._activation_fn: Activation = act_fn self._loss_fn: Loss = loss_fn self._loss_history = list() self._weight_history = { "wo": [], "wh": [], "bo": [], "bh": [], } self._wo: Optional[np.ndarray] = None self._wh: Optional[np.ndarray] = None self._bo: Optional[np.ndarray] = None self._bh: Optional[np.ndarray] = None self._init_weights_and_biases() def _init_weights_and_biases(self) -> None: """ NN._init_weights_and_biases(): Should only be ran once, right before training loop in order to initialize the weights and biases randomly. params: NN object with hidden layer size, output size, and input size defined. returns: self, modifies _bh, _bo, _wo, _wh NN attributes in place. """ np.random.seed(self.seed) self._bh = np.zeros((1, self.hidden_size), dtype=DTYPE) self._bo = np.zeros((1, self.output_size), dtype=DTYPE) self._wh = np.asarray( np.random.randn(self.input_size, self.hidden_size) * np.sqrt(2 / self.input_size), dtype=DTYPE, ) self._wo = np.asarray( np.random.randn(self.hidden_size, self.output_size) * np.sqrt(2 / self.hidden_size), dtype=DTYPE, ) return def _forward(self, X_train: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ _forward(X_train): ran as the first step of each epoch during training. params: X_train: np.ndarray -> data that we are training the NN on. returns: output layer np array containing the predicted outputs calculated using the weights and biases of the current epoch. """ assert self._activation_fn is not None # hidden layer hidden_layer_output = self._activation_fn.forward( np.dot(X_train, self._wh) + self._bh ) # output layer (prediction layer) y_hat = self._activation_fn.forward( np.dot(hidden_layer_output, self._wo) + self._bo ) return y_hat, hidden_layer_output def _backward( self, X_train: np.ndarray, y_hat: np.ndarray, y_train: np.ndarray, hidden_output: np.ndarray, ) -> None: assert self._activation_fn is not None assert self._wo is not None assert self._loss_fn is not None # Calculate the error at the output # This should be the derivative of the loss function with respect to the output of the network error_output = self._loss_fn.backward( y_hat, y_train ) * self._activation_fn.backward(y_hat) # Calculate gradients for output layer weights and biases wo_prime = np.dot(hidden_output.T, error_output) * self.learning_rate bo_prime = np.sum(error_output, axis=0, keepdims=True) * self.learning_rate # Propagate the error back to the hidden layer error_hidden = np.dot(error_output, self._wo.T) * self._activation_fn.backward( hidden_output ) # Calculate gradients for hidden layer weights and biases wh_prime = np.dot(X_train.T, error_hidden) * self.learning_rate bh_prime = np.sum(error_hidden, axis=0, keepdims=True) * self.learning_rate # Update weights and biases self._wo -= wo_prime self._wh -= wh_prime self._bo -= bo_prime self._bh -= bh_prime def train(self, X_train: np.ndarray, y_train: np.ndarray) -> "NN": assert self._loss_fn is not None for _ in gr.Progress().tqdm(range(self.epochs)): y_hat, hidden_output = self._forward(X_train=X_train) loss = self._loss_fn.forward(y_hat=y_hat, y_true=y_train) self._loss_history.append(loss) self._backward( X_train=X_train, y_hat=y_hat, y_train=y_train, hidden_output=hidden_output, ) # keep track of weights an biases at each epoch for visualization self._weight_history["wo"].append(self._wo[0, 0]) self._weight_history["wh"].append(self._wh[0, 0]) self._weight_history["bo"].append(self._bo[0, 0]) self._weight_history["bh"].append(self._bh[0, 0]) return self def predict(self, X_test: np.ndarray) -> np.ndarray: return self._forward(X_train=X_test)[0]