From aacb2a5900eb78b2382ecd7df299dfbcc96588b8 Mon Sep 17 00:00:00 2001
From: Kyle Nakamura <knakamura13dev@gmail.com>
Date: Mon, 16 Sep 2024 16:19:15 -0700
Subject: [PATCH] tests: 100% coverage for .neural._NNBase

---
 src/mlrose_ky/neural/_nn_base.py  |  42 ++-----
 tests/test_neural/test_nn_base.py | 198 +++++++++++++++++++++---------
 2 files changed, 146 insertions(+), 94 deletions(-)

diff --git a/src/mlrose_ky/neural/_nn_base.py b/src/mlrose_ky/neural/_nn_base.py
index 0bef1422..147caf24 100644
--- a/src/mlrose_ky/neural/_nn_base.py
+++ b/src/mlrose_ky/neural/_nn_base.py
@@ -22,46 +22,15 @@ class _NNBase(BaseEstimator, ABC):
     predicting with neural network models.
     """
 
-    @abstractmethod
-    def __init__(self):
-        pass
-
     @abstractmethod
     def fit(self, X: np.ndarray, y: np.ndarray = None, init_weights: np.ndarray = None):
-        """
-        Fit the neural network to the data.
-
-        Parameters
-        ----------
-        X : np.ndarray
-            Numpy array containing the feature dataset with each row
-            representing a single observation.
-        y : np.ndarray, optional
-            Numpy array containing data labels. Length must be the same as
-            the length of X.
-        init_weights : np.ndarray, optional
-            Numpy array containing starting weights for the algorithm.
-            If None, a random state is used.
-        """
-        pass
+        """Fit the neural network to the data."""
+        raise NotImplementedError("Subclasses must implement fit method")
 
     @abstractmethod
     def predict(self, X: np.ndarray) -> np.ndarray:
-        """
-        Use the model to predict data labels for a given feature array.
-
-        Parameters
-        ----------
-        X : np.ndarray
-            Numpy array containing the feature dataset with each row
-            representing a single observation.
-
-        Returns
-        -------
-        np.ndarray
-            Numpy array containing the predicted data labels.
-        """
-        pass
+        """Use the model to predict data labels for a given feature array."""
+        raise NotImplementedError("Subclasses must implement predict method")
 
     @staticmethod
     def _calculate_state_size(node_list: list[int]) -> int:
@@ -227,6 +196,9 @@ def _predict(
         predicted_probs : np.ndarray or None
             Predicted probabilities for the input dataset, if the network is a classifier.
         """
+        if not node_list:
+            raise ValueError("node_list cannot be empty.")
+
         weights = list(unflatten_weights(fitted_weights, node_list))
 
         if bias:
diff --git a/tests/test_neural/test_nn_base.py b/tests/test_neural/test_nn_base.py
index d048df6e..082f6536 100644
--- a/tests/test_neural/test_nn_base.py
+++ b/tests/test_neural/test_nn_base.py
@@ -4,82 +4,162 @@
 # License: BSD 3-clause
 
 import numpy as np
+import pytest
 
-from tests.globals import sample_data
-from mlrose_ky import flatten_weights, unflatten_weights, identity
-from mlrose_ky.neural.fitness.network_weights import NetworkWeights
-from mlrose_ky.opt_probs import ContinuousOpt
-from mlrose_ky.algorithms.gd import gradient_descent
+from mlrose_ky import NetworkWeights, ContinuousOpt
 
 # noinspection PyProtectedMember
 from mlrose_ky.neural._nn_base import _NNBase
 
-# TODO: Add tests for _build_node_list(), _format_x_y_data(), _build_problem_and_fitness_function(), _predict(), and ensure 100% coverage.
-
 
 class TestNNBase:
-    """Test cases for neural network-related utilities."""
-
-    def test_flatten_weights(self):
-        """Test flatten_weights function."""
-        x = np.arange(12)
-        y = np.arange(6)
-        z = np.arange(16)
+    """Test cases for the neural network base class _NNBase."""
 
-        a = np.reshape(x, (4, 3))
-        b = np.reshape(y, (3, 2))
-        c = np.reshape(z, (2, 8))
+    def test_nn_base_instantiation_raises(self):
+        """Test that instantiating _NNBase raises TypeError due to abstract methods."""
+        with pytest.raises(TypeError, match="Can't instantiate abstract class _NNBase with abstract methods fit, predict"):
+            _NNBase()
 
-        weights = [a, b, c]
-        flat = list(x) + list(y) + list(z)
+    def test_nn_base_abstract_methods(self):
+        """Test that calling abstract methods raises NotImplementedError."""
 
-        assert np.array_equal(np.array(flatten_weights(weights)), np.array(flat))
+        class TestNN(_NNBase):
 
-    def test_unflatten_weights(self):
-        """Test unflatten_weights function."""
-        x = np.arange(12)
-        y = np.arange(6)
-        z = np.arange(16)
+            def fit(self, X, y=None, init_weights=None):
+                super().fit(X, y, init_weights)
 
-        a = np.reshape(x, (4, 3))
-        b = np.reshape(y, (3, 2))
-        c = np.reshape(z, (2, 8))
+            def predict(self, X):
+                super().predict(X)
 
-        flat = list(x) + list(y) + list(z)
-        nodes = [4, 3, 2, 8]
-        weights = list(unflatten_weights(np.asarray(flat), nodes))
+        nn = TestNN()
+        _X = np.array([[0]])
+        _y = np.array([0])
 
-        assert np.array_equal(weights[0], a) and np.array_equal(weights[1], b) and np.array_equal(weights[2], c)
+        with pytest.raises(NotImplementedError, match="Subclasses must implement fit method"):
+            nn.fit(_X, _y)
 
-    def test_gradient_descent(self, sample_data):
-        """Test gradient descent algorithm on sample data."""
-        X, y_classifier, _, _ = sample_data
-        hidden_nodes = [2]
-        bias = False
-        node_list = [X.shape[1], *hidden_nodes, 2 if bias else 1]
-        fitness = NetworkWeights(X, y_classifier, node_list, activation=identity, bias=bias, is_classifier=False)
+        with pytest.raises(NotImplementedError, match="Subclasses must implement predict method"):
+            nn.predict(_X)
 
-        num_weights = _NNBase._calculate_state_size(node_list)
-        test_weights = np.ones(num_weights)
+    def test_calculate_state_size(self):
+        """Test _calculate_state_size static method."""
+        node_list = [2, 3, 1]
+        expected_size = 2 * 3 + 3 * 1  # 6 + 3 = 9
+        size = _NNBase._calculate_state_size(node_list)
+        assert size == expected_size
 
-        problem = ContinuousOpt(num_weights, fitness, maximize=False, min_val=-1)
-        test_fitness = -1 * problem.eval_fitness(test_weights)
-        best_state, best_fitness, _ = gradient_descent(problem)
+        node_list = [4]
+        size = _NNBase._calculate_state_size(node_list)
+        assert size == 0
 
-        assert len(best_state) == num_weights and min(best_state) >= -1 and max(best_state) <= 1 and best_fitness < test_fitness
+        node_list = []
+        size = _NNBase._calculate_state_size(node_list)
+        assert size == 0
 
-    def test_gradient_descent_iter1(self, sample_data):
-        """Test gradient descent with one iteration."""
-        X, y_classifier, _, _ = sample_data
-        hidden_nodes = [2]
+    def test_build_node_list(self):
+        """Test _build_node_list static method."""
+        X = np.zeros((10, 5))
+        y = np.zeros((10, 2))
+        hidden_nodes = [4, 3]
         bias = False
-        node_list = [X.shape[1], *hidden_nodes, 2 if bias else 1]
-        fitness = NetworkWeights(X, y_classifier, node_list, activation=identity, bias=bias, is_classifier=False)
-
-        num_weights = _NNBase._calculate_state_size(node_list)
-        problem = ContinuousOpt(num_weights, fitness, maximize=False, min_val=-1)
-        init_weights = np.ones(num_weights)
-        best_state, best_fitness, _ = gradient_descent(problem, max_iters=1, init_state=init_weights)
-
-        x = np.array([-0.7, -0.7, -0.9, -0.9, -0.9, -0.9, -1, -1, -1, -1])
-        assert np.allclose(best_state, x, atol=0.001) and round(best_fitness, 2) == 19.14
+        node_list = _NNBase._build_node_list(X, y, hidden_nodes, bias)
+        expected_node_list = [5, 4, 3, 2]
+        assert node_list == expected_node_list
+
+        bias = True
+        node_list = _NNBase._build_node_list(X, y, hidden_nodes, bias)
+        expected_node_list = [6, 4, 3, 2]
+        assert node_list == expected_node_list
+
+        hidden_nodes = []
+        node_list = _NNBase._build_node_list(X, y, hidden_nodes)
+        expected_node_list = [5, 2]
+        assert node_list == expected_node_list
+
+    def test_format_x_y_data(self):
+        """Test _format_x_y_data static method."""
+        X = np.array([[1, 2], [3, 4]])
+        y = np.array([1, 0])
+        X_formatted, y_formatted = _NNBase._format_x_y_data(X, y)
+        assert np.array_equal(X_formatted, X)
+        assert y_formatted.shape == (2, 1)
+        assert np.array_equal(y_formatted, np.array([[1], [0]]))
+
+        y = np.array([[1], [0]])
+        X_formatted, y_formatted = _NNBase._format_x_y_data(X, y)
+        assert np.array_equal(y_formatted, y)
+
+        y = np.array([1])
+        with pytest.raises(ValueError, match="The length of X \\(2\\) and y \\(1\\) must be equal."):
+            _NNBase._format_x_y_data(X, y)
+
+    def test_build_problem_and_fitness_function(self):
+        """Test _build_problem_and_fitness_function static method."""
+        X = np.array([[0, 1], [1, 0]])
+        y = np.array([[1], [0]])
+        node_list = [2, 2, 1]
+
+        # noinspection PyMissingOrEmptyDocstring
+        def activation(x, deriv=False):
+            if deriv:
+                return np.ones_like(x)
+            return np.tanh(x)
+
+        learning_rate = 0.1
+        clip_max = 5.0
+        bias = False
+        is_classifier = True
+
+        fitness, problem = _NNBase._build_problem_and_fitness_function(
+            X, y, node_list, activation, learning_rate, clip_max, bias, is_classifier
+        )
+        assert isinstance(fitness, NetworkWeights), "Fitness function is not of type NetworkWeights."
+        assert isinstance(problem, ContinuousOpt), "Problem is not of type ContinuousOpt."
+        assert problem.length == _NNBase._calculate_state_size(node_list), "Incorrect problem length."
+        assert problem.maximize == -1.0, "Problem should be a minimization problem."
+        assert problem.min_val == -clip_max, "Incorrect min_val in problem."
+        assert problem.max_val == clip_max, "Incorrect max_val in problem."
+        assert problem.step == learning_rate, "Incorrect step size in problem."
+
+    def test_predict(self):
+        """Test _predict static method."""
+
+        # noinspection PyMissingOrEmptyDocstring
+        def input_activation(x):
+            return x
+
+        # noinspection PyMissingOrEmptyDocstring
+        def output_activation(x):
+            return x
+
+        X = np.array([[1, 2], [3, 4]])
+        node_list = [2, 2, 1]
+        bias = False
+        is_classifier = True
+        total_weights = _NNBase._calculate_state_size(node_list)
+        fitted_weights = np.ones(total_weights)
+
+        y_pred, predicted_probs = _NNBase._predict(X, fitted_weights, node_list, input_activation, output_activation, bias, is_classifier)
+        assert y_pred.shape == (2, 1)
+        assert predicted_probs.shape == (2, 1)
+
+        # Test with bias
+        bias = True
+        node_list = [3, 2, 1]
+        total_weights = _NNBase._calculate_state_size(node_list)
+        fitted_weights = np.ones(total_weights)
+        y_pred, predicted_probs = _NNBase._predict(X, fitted_weights, node_list, input_activation, output_activation, bias, is_classifier)
+        assert y_pred.shape == (2, 1)
+        assert predicted_probs.shape == (2, 1)
+
+        # Test for regression
+        is_classifier = False
+        y_pred, predicted_probs = _NNBase._predict(X, fitted_weights, node_list, input_activation, output_activation, bias, is_classifier)
+        assert y_pred.shape == (2, 1)
+        assert predicted_probs is None
+
+        # Edge case: Empty node_list
+        node_list = []
+        fitted_weights = np.array([])
+        with pytest.raises(ValueError, match="node_list cannot be empty."):
+            _NNBase._predict(X, fitted_weights, node_list, input_activation, output_activation, bias, is_classifier)