Broken: refactor all algorithms/ base classes

- Add type hints for all methods/functions - Refactor file and method/function docstrings - Improve verbosity and formatting of raise statements - General code style improvements
knakamura13 · Aug 3, 2024 · cd1ce2a · cd1ce2a
1 parent 751cc31
commit cd1ce2a
Show file tree

Hide file tree

Showing 6 changed files with 282 additions and 245 deletions.
diff --git a/mlrose_hiive/algorithms/ga.py b/mlrose_hiive/algorithms/ga.py
@@ -4,23 +4,74 @@
 # License: BSD 3 clause
 
 import numpy as np
+from typing import Callable, Any
 from mlrose_hiive.decorators import short_name
 
 
-def _get_hamming_distance_default(population, p1):
+def _get_hamming_distance_default(population: np.ndarray, p1: np.ndarray) -> np.ndarray:
+    """
+    Calculate the Hamming distance between a given individual and the rest of the population.
+
+    Parameters
+    ----------
+    population : np.ndarray
+        Population of individuals.
+    p1 : np.ndarray
+        Individual to compare with the population.
+
+    Returns
+    -------
+    np.ndarray
+        Array of Hamming distances.
+    """
     return np.array([np.count_nonzero(p1 != p2) / len(p1) for p2 in population])
 
 
-def _get_hamming_distance_float(population, p1):
+def _get_hamming_distance_float(population: np.ndarray, p1: np.ndarray) -> np.ndarray:
+    """
+    Calculate the Hamming distance (as a float) between a given individual and the rest of the population.
+
+    Parameters
+    ----------
+    population : np.ndarray
+        Population of individuals.
+    p1 : np.ndarray
+        Individual to compare with the population.
+
+    Returns
+    -------
+    np.ndarray
+        Array of Hamming distances.
+    """
     return np.array([np.abs(p1 - p2) / len(p1) for p2 in population])
 
 
-def _genetic_alg_select_parents(pop_size, problem, get_hamming_distance_func, hamming_factor=0.0):
+def _genetic_alg_select_parents(pop_size: int, problem: Any,
+                                get_hamming_distance_func: Callable[[np.ndarray, np.ndarray], np.ndarray] | None,
+                                hamming_factor: float = 0.0) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Select parents for the next generation in the genetic algorithm.
+
+    Parameters
+    ----------
+    pop_size : int
+        Size of the population.
+    problem : optimization object
+        The optimization problem instance.
+    get_hamming_distance_func : Callable[[np.ndarray, np.ndarray], np.ndarray] | None
+        Function to calculate Hamming distance.
+    hamming_factor : float, default: 0.0
+        Factor to account for Hamming distance in parent selection.
+
+    Returns
+    -------
+    tuple
+        Selected parents (p1, p2) for reproduction.
+    """
     mating_probabilities = problem.get_mate_probs()
 
     if get_hamming_distance_func is not None and hamming_factor > 0.01:
         population = problem.get_population()
-
         selected = np.random.choice(pop_size, p=mating_probabilities)
         p1 = population[selected]
 
@@ -42,105 +93,97 @@ def _genetic_alg_select_parents(pop_size, problem, get_hamming_distance_func, ha
 
 
 @short_name('ga')
-def genetic_alg(problem, pop_size=200, pop_breed_percent=0.75, elite_dreg_ratio=0.99,
-                minimum_elites=0, minimum_dregs=0, mutation_prob=0.1,
-                max_attempts=10, max_iters=np.inf, curve=False, random_state=None,
-                state_fitness_callback=None, callback_user_info=None,
-                hamming_factor=0.0, hamming_decay_factor=None):
-    """Use a standard genetic algorithm to find the optimum for a given optimization problem.
+def genetic_alg(problem: Any,
+                pop_size: int = 200,
+                pop_breed_percent: float = 0.75,
+                elite_dreg_ratio: float = 0.99,
+                minimum_elites: int = 0,
+                minimum_dregs: int = 0,
+                mutation_prob: float = 0.1,
+                max_attempts: int = 10,
+                max_iters: int | float = np.inf,
+                curve: bool = False,
+                random_state: int = None,
+                state_fitness_callback: Callable[..., Any] = None,
+                callback_user_info: Any = None,
+                hamming_factor: float = 0.0,
+                hamming_decay_factor: float = None) -> tuple[np.ndarray, float, np.ndarray | None]:
+    """
+    Use a standard genetic algorithm to find the optimum for a given optimization problem.
+
     Parameters
     ----------
-    problem: optimization object
+    problem : optimization object
         Object containing fitness function optimization problem to be solved.
-        For example, :code:`DiscreteOpt()`, :code:`ContinuousOpt()` or
-    pop_size: int, default: 200
+    pop_size : int, default: 200
         Size of population to be used in genetic algorithm.
-    pop_breed_percent: float, default 0.75
+    pop_breed_percent : float, default 0.75
         Percentage of population to breed in each iteration.
-        The remainder of the population will be filled from the elite and
-        dregs of the prior generation in a ratio specified by elite_dreg_ratio.
-    elite_dreg_ratio: float, default:0.95
+        The remainder of the population will be filled from the elite and dregs of the prior generation in a ratio specified by elite_dreg_ratio.
+    elite_dreg_ratio : float, default:0.95
         The ratio of elites:dregs added directly to the next generation.
-        For the default value, 95% of the added population will be elites,
-        5% will be dregs.
-    minimum_elites: int, default: 0
+        For the default value, 95% of the added population will be elites, 5% will be dregs.
+    minimum_elites : int, default: 0
         Minimum number of elites to be added to next generation
-    minimum_dregs: int, default: 0
+    minimum_dregs : int, default: 0
         Minimum number of dregs to be added to next generation
-    mutation_prob: float, default: 0.1
-        Probability of a mutation at each element of the state vector
-        during reproduction, expressed as a value between 0 and 1.
-    max_attempts: int, default: 10
+    mutation_prob : float, default: 0.1
+        Probability of a mutation at each element of the state vector during reproduction, expressed as a value between 0 and 1.
+    max_attempts : int, default: 10
         Maximum number of attempts to find a better state at each step.
-    max_iters: int, default: np.inf
+    max_iters : int | float, default: np.inf
         Maximum number of iterations of the algorithm.
-    curve: bool, default: False
+    curve : bool, default: False
         Boolean to keep fitness values for a curve.
         If :code:`False`, then no curve is stored.
-        If :code:`True`, then a history of fitness values is provided as a
-        third return value.
-    random_state: int, default: None
-        If random_state is a positive integer, random_state is the seed used
-        by np.random.seed(); otherwise, the random seed is not set.
-    state_fitness_callback: function taking five parameters, default: None
+        If :code:`True`, then a history of fitness values is provided as a third return value.
+    random_state : int | None, default: None
+        If random_state is a positive integer, random_state is the seed used by np.random.seed(); otherwise, the random seed is not set.
+    state_fitness_callback : Callable[..., Any] | None, default: None
         If specified, this callback will be invoked once per iteration.
         Parameters are (iteration, max attempts reached?, current best state, current best fit, user callback data).
         Return true to continue iterating, or false to stop.
-    callback_user_info: any, default: None
+    callback_user_info : Any, default: None
         User data passed as last parameter of callback.
-    hamming_factor: float, default: 0.0
+    hamming_factor : float, default: 0.0
         Factor to account for Hamming distance in parent selection.
-    hamming_decay_factor: float, default: None
+    hamming_decay_factor : float | None, default: None
         Decay factor for the hamming_factor over iterations.
+
     Returns
     -------
-    best_state: np.ndarray
+    best_state : np.ndarray
         Numpy array containing state that optimizes the fitness function.
-    best_fitness: float
+    best_fitness : float
         Value of fitness function at best state.
-    fitness_curve: np.ndarray
-        Numpy array of arrays containing the fitness of the entire population
-        at every iteration.
+    fitness_curve : np.ndarray | None
+        Numpy array of arrays containing the fitness of the entire population at every iteration.
         Only returned if input argument :code:`curve` is :code:`True`.
+
     References
     ----------
-    Russell, S. and P. Norvig (2010). *Artificial Intelligence: A Modern
-    Approach*, 3rd edition. Prentice Hall, New Jersey, USA.
+    Russell, S. and P. Norvig (2010). *Artificial Intelligence: A Modern Approach*, 3rd edition.
+    Prentice Hall, New Jersey, USA.
     """
-    if pop_size < 0:
-        raise Exception("""pop_size must be a positive integer.""")
-    elif not isinstance(pop_size, int):
-        if pop_size.is_integer():
-            pop_size = int(pop_size)
-        else:
-            raise Exception("""pop_size must be a positive integer.""")
-
-    breeding_pop_size = int(pop_size * pop_breed_percent) - (minimum_elites + minimum_dregs)
-    if breeding_pop_size < 1:
-        raise Exception("""pop_breed_percent must be large enough to ensure at least one mating.""")
-
-    if pop_breed_percent > 1:
-        raise Exception("""pop_breed_percent must be less than 1.""")
-
-    if elite_dreg_ratio < 0 or elite_dreg_ratio > 1:
-        raise Exception("""elite_dreg_ratio must be between 0 and 1.""")
-
-    if mutation_prob < 0 or mutation_prob > 1:
-        raise Exception("""mutation_prob must be between 0 and 1.""")
-
-    if (not isinstance(max_attempts, int) and not max_attempts.is_integer()) or max_attempts < 0:
-        raise Exception("""max_attempts must be a positive integer.""")
-
-    if (not isinstance(max_iters, int) and max_iters != np.inf and not max_iters.is_integer()) or max_iters < 0:
-        raise Exception("""max_iters must be a positive integer.""")
+    if not isinstance(pop_size, int) or pop_size < 0:
+        raise ValueError(f"pop_size must be a positive integer. Got {pop_size}")
+    if not 0 <= pop_breed_percent <= 1:
+        raise ValueError(f"pop_breed_percent must be between 0 and 1. Got {pop_breed_percent}")
+    if not 0 <= elite_dreg_ratio <= 1:
+        raise ValueError(f"elite_dreg_ratio must be between 0 and 1. Got {elite_dreg_ratio}")
+    if not 0 <= mutation_prob <= 1:
+        raise ValueError(f"mutation_prob must be between 0 and 1. Got {mutation_prob}")
+    if not isinstance(max_attempts, int) or max_attempts < 0:
+        raise ValueError(f"max_attempts must be a positive integer. Got {max_attempts}")
+    if not (isinstance(max_iters, int) or max_iters == np.inf) or max_iters < 0:
+        raise ValueError(f"max_iters must be a positive integer or np.inf. Got {max_iters}")
 
     # Set random seed
     if isinstance(random_state, int) and random_state > 0:
         np.random.seed(random_state)
 
+    # Initialize problem
     fitness_curve = []
-
-    # Initialize problem, population and attempts counter
     problem.reset()
     problem.random_pop(pop_size)
     if state_fitness_callback is not None:
@@ -151,25 +194,25 @@ def genetic_alg(problem, pop_size=200, pop_breed_percent=0.75, elite_dreg_ratio=
                                fitness_evaluations=problem.fitness_evaluations,
                                user_data=callback_user_info)
 
-    get_hamming_distance_func = None
+    get_hamming_distance_func: Callable[[np.ndarray, np.ndarray], np.ndarray] | None = None
     if hamming_factor > 0:
         g1 = problem.get_population()[0][0]
         if isinstance(g1, float) or g1.dtype == 'float64':
             get_hamming_distance_func = _get_hamming_distance_float
         else:
             get_hamming_distance_func = _get_hamming_distance_default
 
-    attempts = 0
-    iters = 0
-
     # initialize survivor count, elite count and dreg count
+    breeding_pop_size = int(pop_size * pop_breed_percent) - (minimum_elites + minimum_dregs)
     survivors_size = pop_size - breeding_pop_size
     dregs_size = max(int(survivors_size * (1.0 - elite_dreg_ratio)) if survivors_size > 1 else 0, minimum_dregs)
     elites_size = max(survivors_size - dregs_size, minimum_elites)
     if dregs_size + elites_size > survivors_size:
         over_population = dregs_size + elites_size - survivors_size
         breeding_pop_size -= over_population
 
+    attempts = 0
+    iters = 0
     continue_iterating = True
     while (attempts < max_attempts) and (iters < max_iters):
         iters += 1