Refactor all fitness functions (in progress)

- Add type hints for all methods/functions - Refactor file and method/function docstrings - General code style improvements - Vectorize suboptimal code
knakamura13 · Aug 5, 2024 · b634074 · b634074
1 parent a228184
commit b634074
Show file tree

Hide file tree

Showing 27 changed files with 338 additions and 317 deletions.
diff --git a/docs/source/intro.rst b/docs/source/intro.rst
@@ -7,7 +7,7 @@ Project Background
 ------------------
 mlrose was initially developed to support students of Georgia Tech's OMSCS/OMSA offering of CS 7641: Machine Learning.
 
-It includes implementations of all randomized optimization algorithms taught in this course, as well as functionality to apply these algorithms to integer-string optimization problems, such as N-Queens and the Knapsack problem; continuous-valued optimization problems, such as the neural network weight problem; and tour optimization problems, such as the Travelling Salesperson problem. It also has the flexibility to solve user-defined optimization problems. 
+It includes implementations of all randomized optimization algorithms taught in this course, as well as functionality to apply these algorithms to integer-string optimization problems, such as N-Queens and the Knapsack problem; continuous-valued optimization problems, such as the neural network weight problem; and tour optimization problems, such as the Travelling Salesperson problem. It also has the flexibility to solve user-defined optimization problems.
 
 At the time of development, there did not exist a single Python package that collected all of this functionality together in the one location.
 

diff --git a/docs/source/tutorial2.rst b/docs/source/tutorial2.rst
@@ -159,6 +159,6 @@ This solution is illustrated below and can be shown to be the optimal solution t
 
 Summary
 -------
-In this tutorial we introduced the travelling salesperson problem, and discussed how mlrose can be used to efficiently solve this problem. This is an example of how mlrose caters to solving one very specific type of optimization problem. 
+In this tutorial we introduced the travelling salesperson problem, and discussed how mlrose can be used to efficiently solve this problem. This is an example of how mlrose caters to solving one very specific type of optimization problem.
 
 Another very specific type of optimization problem mlrose caters to solving is the machine learning weight optimization problem. That is, the problem of finding the optimal weights for machine learning models such as neural networks and regression models. We will discuss how mlrose can be used to solve this problem next, in our third and final tutorial.
diff --git a/mlrose_hiive/__init__.py b/mlrose_hiive/__init__.py
@@ -13,7 +13,7 @@
 from .algorithms.crossovers import OnePointCrossover, UniformCrossover, TSPCrossover
 from .algorithms.mutators import SingleGeneMutator, DiscreteGeneMutator, GeneSwapMutator, SingleShiftMutator
 
-from .fitness import OneMax, FlipFlop, FourPeaks, SixPeaks, ContinuousPeaks, Knapsack, TravellingSales, Queens, MaxKColor, CustomFitness
+from .fitness import OneMax, FlipFlop, FourPeaks, SixPeaks, ContinuousPeaks, Knapsack, TravellingSalesperson, Queens, MaxKColor, CustomFitness
 
 from .neural import NeuralNetwork, LinearRegression, LogisticRegression, NNClassifier, nn_core
 from .neural.activation import identity, relu, leaky_relu, sigmoid, softmax, tanh

diff --git a/mlrose_hiive/algorithms/crossovers/tsp_crossover.py b/mlrose_hiive/algorithms/crossovers/tsp_crossover.py
@@ -16,7 +16,7 @@
 
 class TSPCrossover(_CrossoverBase):
     """
-    Crossover operation tailored for the Traveling Salesman Problem (TSP) in genetic algorithms.
+    Crossover operation tailored for the Travelling Salesperson Problem (TSP) in genetic algorithms.
 
     Implements specific crossover techniques that ensure valid TSP routes in the offspring.
     The crossover handles distinct city sequences without repetitions and uses specialized

diff --git a/mlrose_hiive/algorithms/mimic.py b/mlrose_hiive/algorithms/mimic.py
@@ -72,7 +72,7 @@ def mimic(problem: Any,
     De Bonet, J., C. Isbell, and P. Viola (1997). MIMIC: Finding Optima by Estimating Probability Densities.
     In *Advances in Neural Information Processing Systems* (NIPS) 9, pp. 424–430.
     """
-    if problem.get_prob_type() == 'continuous':
+    if problem.get_problem_type() == 'continuous':
         raise ValueError("problem type must be discrete or tsp.")
     if not isinstance(pop_size, int) or pop_size < 0:
         raise ValueError(f"pop_size must be a positive integer. Got {pop_size}")

diff --git a/mlrose_hiive/fitness/__init__.py b/mlrose_hiive/fitness/__init__.py
@@ -1,12 +1,21 @@
+"""Classes for defining fitness functions (i.e., optimization problems) for optimization algorithms."""
+
 from .continuous_peaks import ContinuousPeaks
+
 from .flip_flop import FlipFlop
+
 from .four_peaks import FourPeaks
-from .six_peaks import SixPeaks
-from .continuous_peaks import ContinuousPeaks
-from .one_max import OneMax
-from .max_k_color import MaxKColor
+
 from .knapsack import Knapsack
+
+from .max_k_color import MaxKColor
+
+from .one_max import OneMax
+
 from .queens import Queens
-from .travelling_sales import TravellingSales
+
+from .six_peaks import SixPeaks
+
+from .travelling_salesperson import TravellingSalesperson
 
 from .custom_fitness import CustomFitness
diff --git a/mlrose_hiive/fitness/discrete_peaks_base.py → mlrose_hiive/fitness/_discrete_peaks_base.py b/mlrose_hiive/fitness/discrete_peaks_base.py → mlrose_hiive/fitness/_discrete_peaks_base.py
@@ -1,10 +1,10 @@
-"""Classes for defining fitness functions."""
+"""Class defining the Discrete Peaks base fitness function for use with the Four Peaks, Six Peaks, and Custom fitness functions."""
 
 # Authors: Genevieve Hayes (modified by Andrew Rollings, Kyle Nakamura)
 # License: BSD 3 clause
 
 
-class DiscretePeaksBase:
+class _DiscretePeaksBase:
 
     @staticmethod
     def head(_b, _x):

diff --git a/mlrose_hiive/fitness/continuous_peaks.py b/mlrose_hiive/fitness/continuous_peaks.py
@@ -1,4 +1,4 @@
-"""Classes for defining fitness functions."""
+"""Class defining the Continuous Peaks fitness function for use with optimization algorithms."""
 
 # Authors: Genevieve Hayes (modified by Andrew Rollings, Kyle Nakamura)
 # License: BSD 3 clause
@@ -7,126 +7,125 @@
 
 
 class ContinuousPeaks:
-    """Fitness function for Continuous Peaks optimization problem. Evaluates
-    the fitness of an n-dimensional state vector :math:`x`, given parameter T,
-    as:
-
-    .. math::
-
-        Fitness(x, T) = \\max(max\\_run(0, x), max\\_run(1, x)) + R(x, T)
-
-    where:
-
-    * :math:`max\\_run(b, x)` is the length of the maximum run of b's
-      in :math:`x`;
-    * :math:`R(x, T) = n`, if (:math:`max\\_run(0, x) > T` and
-      :math:`max\\_run(1, x) > T`); and
-    * :math:`R(x, T) = 0`, otherwise.
+    """
+    Fitness function for Continuous Peaks optimization problem. Evaluates the fitness
+    of an n-dimensional state vector `x`, given parameter T.
 
     Parameters
     ----------
-    t_pct: float, default: 0.1
-        Threshold parameter (T) for Continuous Peaks fitness function,
-        expressed as a percentage of the state space dimension, n (i.e.
-        :math:`T = t_{pct} \\times n`).
+    threshold_percentage : float, default=0.1
+        Threshold parameter (T) for Continuous Peaks fitness function, expressed as a
+        percentage of the state space dimension, n (i.e., `T = threshold_percentage * n`).
+
+    Attributes
+    ----------
+    threshold_percentage : float
+        The threshold percentage for the fitness function.
+    problem_type : str
+        Specifies problem type as 'discrete'.
 
     Examples
-    -------
-    >>> import mlrose_hiive
+    --------
     >>> import numpy as np
-    >>> fitness = mlrose_hiive.ContinuousPeaks(t_pct=0.15)
+    >>> fitness = ContinuousPeaks(threshold_percentage=0.15)
     >>> state = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1])
     >>> fitness.evaluate(state)
     17
 
     Note
     ----
-    The Continuous Peaks fitness function is suitable for use in bit-string
-    (discrete-state with :code:`max_val = 2`) optimization problems *only*.
+    The Continuous Peaks fitness function is suitable for use in bit-string (discrete-state
+    with `max_val = 2`) optimization problems only.
     """
 
-    def __init__(self, t_pct=0.1):
+    def __init__(self, threshold_percentage: float = 0.1) -> None:
+        self.threshold_percentage: float = threshold_percentage
+        self.problem_type: str = 'discrete'
 
-        self.t_pct = t_pct
-        self.prob_type = 'discrete'
+        if not (0 <= self.threshold_percentage <= 1):
+            raise ValueError(f"threshold_percentage must be between 0 and 1, got {self.threshold_percentage} instead.")
 
-        if (self.t_pct < 0) or (self.t_pct > 1):
-            raise Exception("""t_pct must be between 0 and 1.""")
-
-    def evaluate(self, state):
-        """Evaluate the fitness of a state vector.
+    def evaluate(self, state: np.ndarray) -> float:
+        """
+        Evaluate the fitness of a state vector.
 
         Parameters
         ----------
-        state: np.ndarray
+        state : np.ndarray
             State array for evaluation.
 
         Returns
         -------
-        fitness: float
-            Value of fitness function.
+        float
+            Value of the fitness function.
         """
-        _n = len(state)
-        _t = np.ceil(self.t_pct*_n)
+        num_elements = len(state)
+        threshold = int(np.ceil(self.threshold_percentage * num_elements))
 
-        # Calculate length of maximum runs of 0's and 1's
-        max_0 = self.max_run(0, state)
-        max_1 = self.max_run(1, state)
+        max_zeros = self._max_run(0, state)
+        max_ones = self._max_run(1, state)
 
-        # Calculate R(X, T)
-        if max_0 > _t and max_1 > _t:
-            _r = _n
-        else:
-            _r = 0
-
-        # Evaluate function
-        fitness = max(max_0, max_1) + _r
+        reward = num_elements if max_zeros > threshold and max_ones > threshold else 0
 
+        fitness = max(max_zeros, max_ones) + reward
         return fitness
 
-    def get_prob_type(self):
-        """ Return the problem type.
+    def get_problem_type(self) -> str:
+        """
+        Return the problem type.
 
         Returns
         -------
-        self.prob_type: string
-            Specifies problem type as 'discrete', 'continuous', 'tsp'
-            or 'either'.
+        str
+            Specifies problem type as 'discrete'.
         """
-        return self.prob_type
+        return self.problem_type
 
     @staticmethod
-    def max_run(_b, _x):
-        """Determine the length of the maximum run of b's in vector x.
+    def _max_run(value: int, vector: np.ndarray) -> int:
+        """
+        Determine the length of the maximum run of a given value in a vector.
 
         Parameters
         ----------
-        _b: int
-            Integer for counting.
-
-        _x: np.ndarray
+        value : int
+            Value to count.
+        vector : np.ndarray
             Vector of integers.
 
         Returns
         -------
-        max: int
-            Length of maximum run of b's.
+        int
+            Length of the maximum run of the given value.
         """
-        # Initialize counter
-        _max = 0
-        run = 0
+        # Create a boolean array where each element is True if it equals the given value
+        is_value = np.array(vector == value)
+
+        # If the value does not exist in the vector, return 0
+        if not np.any(is_value):
+            return 0
+
+        # Calculate the differences between consecutive elements in the boolean array
+        diffs = np.diff(is_value.astype(int))
+
+        # Find the indices where the value starts and ends
+        run_starts = np.where(diffs == 1)[0] + 1
+        run_ends = np.where(diffs == -1)[0] + 1
+
+        # If the run starts at the beginning of the vector, include the first index
+        if is_value[0]:
+            run_starts = np.insert(run_starts, 0, 0)
 
-        # Iterate through values in vector
-        for i in _x:
-            if i == _b:
-                run += 1
-            else:
-                if run > _max:
-                    _max = run
+        # If the run ends at the end of the vector, include the last index
+        if is_value[-1]:
+            run_ends = np.append(run_ends, len(vector))
 
-                run = 0
+        # Ensure that run_ends has the same length as run_starts
+        if len(run_starts) > len(run_ends):
+            run_ends = np.append(run_ends, len(vector))
 
-        if (_x[-1] == _b) and (run > _max):
-            _max = run
+        # Calculate the lengths of the runs
+        run_lengths = run_ends - run_starts
 
-        return _max
+        # Return the maximum run length, or 0 if no runs are found
+        return run_lengths.max() if run_lengths.size > 0 else 0
diff --git a/mlrose_hiive/fitness/custom_fitness.py b/mlrose_hiive/fitness/custom_fitness.py
@@ -1,4 +1,4 @@
-"""Classes for defining fitness functions."""
+"""Class defining a customizable fitness function for use with optimization algorithms."""
 
 # Authors: Genevieve Hayes (modified by Andrew Rollings, Kyle Nakamura)
 # License: BSD 3 clause
@@ -15,7 +15,7 @@ class CustomFitness:
         Function for calculating fitness of a state with the signature
         :code:`fitness_fn(state, **kwargs)`.
 
-    problem_type: string, default: 'either'
+    problem_type: str, default: 'either'
         Specifies problem type as 'discrete', 'continuous', 'tsp' or 'either'
         (denoting either discrete or continuous).
 
@@ -58,12 +58,12 @@ def evaluate(self, state):
         fitness = self.fitness_fn(state, **self.kwargs)
         return fitness
 
-    def get_prob_type(self):
+    def get_problem_type(self):
         """ Return the problem type.
 
         Returns
         -------
-        self.prob_type: string
+        self.prob_type: str
             Specifies problem type as 'discrete', 'continuous', 'tsp'
             or 'either'.
         """

diff --git a/mlrose_hiive/fitness/flip_flop.py b/mlrose_hiive/fitness/flip_flop.py
@@ -1,4 +1,4 @@
-"""Classes for defining fitness functions."""
+"""Class defining the Flip Flop fitness function for use with optimization algorithms."""
 
 # Authors: Genevieve Hayes (modified by Andrew Rollings, Kyle Nakamura)
 # License: BSD 3 clause
@@ -76,12 +76,12 @@ def evaluate_many(states):
 
         return fitness
 
-    def get_prob_type(self):
+    def get_problem_type(self):
         """ Return the problem type.
 
         Returns
         -------
-        self.prob_type: string
+        self.prob_type: str
             Specifies problem type as 'discrete', 'continuous', 'tsp'
             or 'either'.
         """

diff --git a/mlrose_hiive/fitness/four_peaks.py b/mlrose_hiive/fitness/four_peaks.py
@@ -1,14 +1,14 @@
-"""Classes for defining fitness functions."""
+"""Class defining the Four Peaks fitness function for use with optimization algorithms."""
 
 # Authors: Genevieve Hayes (modified by Andrew Rollings, Kyle Nakamura)
 # License: BSD 3 clause
 
 import numpy as np
 
-from mlrose_hiive.fitness.discrete_peaks_base import DiscretePeaksBase
+from mlrose_hiive.fitness._discrete_peaks_base import _DiscretePeaksBase
 
 
-class FourPeaks(DiscretePeaksBase):
+class FourPeaks(_DiscretePeaksBase):
     """Fitness function for Four Peaks optimization problem. Evaluates the
     fitness of an n-dimensional state vector :math:`x`, given parameter T, as:
 
@@ -91,12 +91,12 @@ def evaluate(self, state):
 
         return fitness
 
-    def get_prob_type(self):
+    def get_problem_type(self):
         """ Return the problem type.
 
         Returns
         -------
-        self.prob_type: string
+        self.prob_type: str
             Specifies problem type as 'discrete', 'continuous', 'tsp'
             or 'either'.
         """

diff --git a/mlrose_hiive/fitness/knapsack.py b/mlrose_hiive/fitness/knapsack.py
@@ -1,4 +1,4 @@
-"""Classes for defining fitness functions."""
+"""Class defining the Knapsack fitness function for use with optimization algorithms."""
 
 # Authors: Genevieve Hayes (modified by Andrew Rollings, Kyle Nakamura)
 # License: BSD 3 clause
@@ -107,12 +107,12 @@ def evaluate(self, state):
 
         return fitness
 
-    def get_prob_type(self):
+    def get_problem_type(self):
         """ Return the problem type.
 
         Returns
         -------
-        self.prob_type: string
+        self.prob_type: str
             Specifies problem type as 'discrete', 'continuous', 'tsp'
             or 'either'.
         """