From aacb2a5900eb78b2382ecd7df299dfbcc96588b8 Mon Sep 17 00:00:00 2001 From: Kyle Nakamura Date: Mon, 16 Sep 2024 16:19:15 -0700 Subject: [PATCH] tests: 100% coverage for .neural._NNBase --- src/mlrose_ky/neural/_nn_base.py | 42 ++----- tests/test_neural/test_nn_base.py | 198 +++++++++++++++++++++--------- 2 files changed, 146 insertions(+), 94 deletions(-) diff --git a/src/mlrose_ky/neural/_nn_base.py b/src/mlrose_ky/neural/_nn_base.py index 0bef1422..147caf24 100644 --- a/src/mlrose_ky/neural/_nn_base.py +++ b/src/mlrose_ky/neural/_nn_base.py @@ -22,46 +22,15 @@ class _NNBase(BaseEstimator, ABC): predicting with neural network models. """ - @abstractmethod - def __init__(self): - pass - @abstractmethod def fit(self, X: np.ndarray, y: np.ndarray = None, init_weights: np.ndarray = None): - """ - Fit the neural network to the data. - - Parameters - ---------- - X : np.ndarray - Numpy array containing the feature dataset with each row - representing a single observation. - y : np.ndarray, optional - Numpy array containing data labels. Length must be the same as - the length of X. - init_weights : np.ndarray, optional - Numpy array containing starting weights for the algorithm. - If None, a random state is used. - """ - pass + """Fit the neural network to the data.""" + raise NotImplementedError("Subclasses must implement fit method") @abstractmethod def predict(self, X: np.ndarray) -> np.ndarray: - """ - Use the model to predict data labels for a given feature array. - - Parameters - ---------- - X : np.ndarray - Numpy array containing the feature dataset with each row - representing a single observation. - - Returns - ------- - np.ndarray - Numpy array containing the predicted data labels. - """ - pass + """Use the model to predict data labels for a given feature array.""" + raise NotImplementedError("Subclasses must implement predict method") @staticmethod def _calculate_state_size(node_list: list[int]) -> int: @@ -227,6 +196,9 @@ def _predict( predicted_probs : np.ndarray or None Predicted probabilities for the input dataset, if the network is a classifier. """ + if not node_list: + raise ValueError("node_list cannot be empty.") + weights = list(unflatten_weights(fitted_weights, node_list)) if bias: diff --git a/tests/test_neural/test_nn_base.py b/tests/test_neural/test_nn_base.py index d048df6e..082f6536 100644 --- a/tests/test_neural/test_nn_base.py +++ b/tests/test_neural/test_nn_base.py @@ -4,82 +4,162 @@ # License: BSD 3-clause import numpy as np +import pytest -from tests.globals import sample_data -from mlrose_ky import flatten_weights, unflatten_weights, identity -from mlrose_ky.neural.fitness.network_weights import NetworkWeights -from mlrose_ky.opt_probs import ContinuousOpt -from mlrose_ky.algorithms.gd import gradient_descent +from mlrose_ky import NetworkWeights, ContinuousOpt # noinspection PyProtectedMember from mlrose_ky.neural._nn_base import _NNBase -# TODO: Add tests for _build_node_list(), _format_x_y_data(), _build_problem_and_fitness_function(), _predict(), and ensure 100% coverage. - class TestNNBase: - """Test cases for neural network-related utilities.""" - - def test_flatten_weights(self): - """Test flatten_weights function.""" - x = np.arange(12) - y = np.arange(6) - z = np.arange(16) + """Test cases for the neural network base class _NNBase.""" - a = np.reshape(x, (4, 3)) - b = np.reshape(y, (3, 2)) - c = np.reshape(z, (2, 8)) + def test_nn_base_instantiation_raises(self): + """Test that instantiating _NNBase raises TypeError due to abstract methods.""" + with pytest.raises(TypeError, match="Can't instantiate abstract class _NNBase with abstract methods fit, predict"): + _NNBase() - weights = [a, b, c] - flat = list(x) + list(y) + list(z) + def test_nn_base_abstract_methods(self): + """Test that calling abstract methods raises NotImplementedError.""" - assert np.array_equal(np.array(flatten_weights(weights)), np.array(flat)) + class TestNN(_NNBase): - def test_unflatten_weights(self): - """Test unflatten_weights function.""" - x = np.arange(12) - y = np.arange(6) - z = np.arange(16) + def fit(self, X, y=None, init_weights=None): + super().fit(X, y, init_weights) - a = np.reshape(x, (4, 3)) - b = np.reshape(y, (3, 2)) - c = np.reshape(z, (2, 8)) + def predict(self, X): + super().predict(X) - flat = list(x) + list(y) + list(z) - nodes = [4, 3, 2, 8] - weights = list(unflatten_weights(np.asarray(flat), nodes)) + nn = TestNN() + _X = np.array([[0]]) + _y = np.array([0]) - assert np.array_equal(weights[0], a) and np.array_equal(weights[1], b) and np.array_equal(weights[2], c) + with pytest.raises(NotImplementedError, match="Subclasses must implement fit method"): + nn.fit(_X, _y) - def test_gradient_descent(self, sample_data): - """Test gradient descent algorithm on sample data.""" - X, y_classifier, _, _ = sample_data - hidden_nodes = [2] - bias = False - node_list = [X.shape[1], *hidden_nodes, 2 if bias else 1] - fitness = NetworkWeights(X, y_classifier, node_list, activation=identity, bias=bias, is_classifier=False) + with pytest.raises(NotImplementedError, match="Subclasses must implement predict method"): + nn.predict(_X) - num_weights = _NNBase._calculate_state_size(node_list) - test_weights = np.ones(num_weights) + def test_calculate_state_size(self): + """Test _calculate_state_size static method.""" + node_list = [2, 3, 1] + expected_size = 2 * 3 + 3 * 1 # 6 + 3 = 9 + size = _NNBase._calculate_state_size(node_list) + assert size == expected_size - problem = ContinuousOpt(num_weights, fitness, maximize=False, min_val=-1) - test_fitness = -1 * problem.eval_fitness(test_weights) - best_state, best_fitness, _ = gradient_descent(problem) + node_list = [4] + size = _NNBase._calculate_state_size(node_list) + assert size == 0 - assert len(best_state) == num_weights and min(best_state) >= -1 and max(best_state) <= 1 and best_fitness < test_fitness + node_list = [] + size = _NNBase._calculate_state_size(node_list) + assert size == 0 - def test_gradient_descent_iter1(self, sample_data): - """Test gradient descent with one iteration.""" - X, y_classifier, _, _ = sample_data - hidden_nodes = [2] + def test_build_node_list(self): + """Test _build_node_list static method.""" + X = np.zeros((10, 5)) + y = np.zeros((10, 2)) + hidden_nodes = [4, 3] bias = False - node_list = [X.shape[1], *hidden_nodes, 2 if bias else 1] - fitness = NetworkWeights(X, y_classifier, node_list, activation=identity, bias=bias, is_classifier=False) - - num_weights = _NNBase._calculate_state_size(node_list) - problem = ContinuousOpt(num_weights, fitness, maximize=False, min_val=-1) - init_weights = np.ones(num_weights) - best_state, best_fitness, _ = gradient_descent(problem, max_iters=1, init_state=init_weights) - - x = np.array([-0.7, -0.7, -0.9, -0.9, -0.9, -0.9, -1, -1, -1, -1]) - assert np.allclose(best_state, x, atol=0.001) and round(best_fitness, 2) == 19.14 + node_list = _NNBase._build_node_list(X, y, hidden_nodes, bias) + expected_node_list = [5, 4, 3, 2] + assert node_list == expected_node_list + + bias = True + node_list = _NNBase._build_node_list(X, y, hidden_nodes, bias) + expected_node_list = [6, 4, 3, 2] + assert node_list == expected_node_list + + hidden_nodes = [] + node_list = _NNBase._build_node_list(X, y, hidden_nodes) + expected_node_list = [5, 2] + assert node_list == expected_node_list + + def test_format_x_y_data(self): + """Test _format_x_y_data static method.""" + X = np.array([[1, 2], [3, 4]]) + y = np.array([1, 0]) + X_formatted, y_formatted = _NNBase._format_x_y_data(X, y) + assert np.array_equal(X_formatted, X) + assert y_formatted.shape == (2, 1) + assert np.array_equal(y_formatted, np.array([[1], [0]])) + + y = np.array([[1], [0]]) + X_formatted, y_formatted = _NNBase._format_x_y_data(X, y) + assert np.array_equal(y_formatted, y) + + y = np.array([1]) + with pytest.raises(ValueError, match="The length of X \\(2\\) and y \\(1\\) must be equal."): + _NNBase._format_x_y_data(X, y) + + def test_build_problem_and_fitness_function(self): + """Test _build_problem_and_fitness_function static method.""" + X = np.array([[0, 1], [1, 0]]) + y = np.array([[1], [0]]) + node_list = [2, 2, 1] + + # noinspection PyMissingOrEmptyDocstring + def activation(x, deriv=False): + if deriv: + return np.ones_like(x) + return np.tanh(x) + + learning_rate = 0.1 + clip_max = 5.0 + bias = False + is_classifier = True + + fitness, problem = _NNBase._build_problem_and_fitness_function( + X, y, node_list, activation, learning_rate, clip_max, bias, is_classifier + ) + assert isinstance(fitness, NetworkWeights), "Fitness function is not of type NetworkWeights." + assert isinstance(problem, ContinuousOpt), "Problem is not of type ContinuousOpt." + assert problem.length == _NNBase._calculate_state_size(node_list), "Incorrect problem length." + assert problem.maximize == -1.0, "Problem should be a minimization problem." + assert problem.min_val == -clip_max, "Incorrect min_val in problem." + assert problem.max_val == clip_max, "Incorrect max_val in problem." + assert problem.step == learning_rate, "Incorrect step size in problem." + + def test_predict(self): + """Test _predict static method.""" + + # noinspection PyMissingOrEmptyDocstring + def input_activation(x): + return x + + # noinspection PyMissingOrEmptyDocstring + def output_activation(x): + return x + + X = np.array([[1, 2], [3, 4]]) + node_list = [2, 2, 1] + bias = False + is_classifier = True + total_weights = _NNBase._calculate_state_size(node_list) + fitted_weights = np.ones(total_weights) + + y_pred, predicted_probs = _NNBase._predict(X, fitted_weights, node_list, input_activation, output_activation, bias, is_classifier) + assert y_pred.shape == (2, 1) + assert predicted_probs.shape == (2, 1) + + # Test with bias + bias = True + node_list = [3, 2, 1] + total_weights = _NNBase._calculate_state_size(node_list) + fitted_weights = np.ones(total_weights) + y_pred, predicted_probs = _NNBase._predict(X, fitted_weights, node_list, input_activation, output_activation, bias, is_classifier) + assert y_pred.shape == (2, 1) + assert predicted_probs.shape == (2, 1) + + # Test for regression + is_classifier = False + y_pred, predicted_probs = _NNBase._predict(X, fitted_weights, node_list, input_activation, output_activation, bias, is_classifier) + assert y_pred.shape == (2, 1) + assert predicted_probs is None + + # Edge case: Empty node_list + node_list = [] + fitted_weights = np.array([]) + with pytest.raises(ValueError, match="node_list cannot be empty."): + _NNBase._predict(X, fitted_weights, node_list, input_activation, output_activation, bias, is_classifier)