Skip to content

Commit

Permalink
Broken: refactor all algorithms/ base classes
Browse files Browse the repository at this point in the history
- Add type hints for all methods/functions
- Refactor file and method/function docstrings
- Improve verbosity and formatting of raise statements
- General code style improvements
  • Loading branch information
knakamura13 committed Aug 3, 2024
1 parent 751cc31 commit cd1ce2a
Show file tree
Hide file tree
Showing 6 changed files with 282 additions and 245 deletions.
193 changes: 118 additions & 75 deletions mlrose_hiive/algorithms/ga.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,74 @@
# License: BSD 3 clause

import numpy as np
from typing import Callable, Any
from mlrose_hiive.decorators import short_name


def _get_hamming_distance_default(population, p1):
def _get_hamming_distance_default(population: np.ndarray, p1: np.ndarray) -> np.ndarray:
"""
Calculate the Hamming distance between a given individual and the rest of the population.
Parameters
----------
population : np.ndarray
Population of individuals.
p1 : np.ndarray
Individual to compare with the population.
Returns
-------
np.ndarray
Array of Hamming distances.
"""
return np.array([np.count_nonzero(p1 != p2) / len(p1) for p2 in population])


def _get_hamming_distance_float(population, p1):
def _get_hamming_distance_float(population: np.ndarray, p1: np.ndarray) -> np.ndarray:
"""
Calculate the Hamming distance (as a float) between a given individual and the rest of the population.
Parameters
----------
population : np.ndarray
Population of individuals.
p1 : np.ndarray
Individual to compare with the population.
Returns
-------
np.ndarray
Array of Hamming distances.
"""
return np.array([np.abs(p1 - p2) / len(p1) for p2 in population])


def _genetic_alg_select_parents(pop_size, problem, get_hamming_distance_func, hamming_factor=0.0):
def _genetic_alg_select_parents(pop_size: int, problem: Any,
get_hamming_distance_func: Callable[[np.ndarray, np.ndarray], np.ndarray] | None,
hamming_factor: float = 0.0) -> tuple[np.ndarray, np.ndarray]:
"""
Select parents for the next generation in the genetic algorithm.
Parameters
----------
pop_size : int
Size of the population.
problem : optimization object
The optimization problem instance.
get_hamming_distance_func : Callable[[np.ndarray, np.ndarray], np.ndarray] | None
Function to calculate Hamming distance.
hamming_factor : float, default: 0.0
Factor to account for Hamming distance in parent selection.
Returns
-------
tuple
Selected parents (p1, p2) for reproduction.
"""
mating_probabilities = problem.get_mate_probs()

if get_hamming_distance_func is not None and hamming_factor > 0.01:
population = problem.get_population()

selected = np.random.choice(pop_size, p=mating_probabilities)
p1 = population[selected]

Expand All @@ -42,105 +93,97 @@ def _genetic_alg_select_parents(pop_size, problem, get_hamming_distance_func, ha


@short_name('ga')
def genetic_alg(problem, pop_size=200, pop_breed_percent=0.75, elite_dreg_ratio=0.99,
minimum_elites=0, minimum_dregs=0, mutation_prob=0.1,
max_attempts=10, max_iters=np.inf, curve=False, random_state=None,
state_fitness_callback=None, callback_user_info=None,
hamming_factor=0.0, hamming_decay_factor=None):
"""Use a standard genetic algorithm to find the optimum for a given optimization problem.
def genetic_alg(problem: Any,
pop_size: int = 200,
pop_breed_percent: float = 0.75,
elite_dreg_ratio: float = 0.99,
minimum_elites: int = 0,
minimum_dregs: int = 0,
mutation_prob: float = 0.1,
max_attempts: int = 10,
max_iters: int | float = np.inf,
curve: bool = False,
random_state: int = None,
state_fitness_callback: Callable[..., Any] = None,
callback_user_info: Any = None,
hamming_factor: float = 0.0,
hamming_decay_factor: float = None) -> tuple[np.ndarray, float, np.ndarray | None]:
"""
Use a standard genetic algorithm to find the optimum for a given optimization problem.
Parameters
----------
problem: optimization object
problem : optimization object
Object containing fitness function optimization problem to be solved.
For example, :code:`DiscreteOpt()`, :code:`ContinuousOpt()` or
pop_size: int, default: 200
pop_size : int, default: 200
Size of population to be used in genetic algorithm.
pop_breed_percent: float, default 0.75
pop_breed_percent : float, default 0.75
Percentage of population to breed in each iteration.
The remainder of the population will be filled from the elite and
dregs of the prior generation in a ratio specified by elite_dreg_ratio.
elite_dreg_ratio: float, default:0.95
The remainder of the population will be filled from the elite and dregs of the prior generation in a ratio specified by elite_dreg_ratio.
elite_dreg_ratio : float, default:0.95
The ratio of elites:dregs added directly to the next generation.
For the default value, 95% of the added population will be elites,
5% will be dregs.
minimum_elites: int, default: 0
For the default value, 95% of the added population will be elites, 5% will be dregs.
minimum_elites : int, default: 0
Minimum number of elites to be added to next generation
minimum_dregs: int, default: 0
minimum_dregs : int, default: 0
Minimum number of dregs to be added to next generation
mutation_prob: float, default: 0.1
Probability of a mutation at each element of the state vector
during reproduction, expressed as a value between 0 and 1.
max_attempts: int, default: 10
mutation_prob : float, default: 0.1
Probability of a mutation at each element of the state vector during reproduction, expressed as a value between 0 and 1.
max_attempts : int, default: 10
Maximum number of attempts to find a better state at each step.
max_iters: int, default: np.inf
max_iters : int | float, default: np.inf
Maximum number of iterations of the algorithm.
curve: bool, default: False
curve : bool, default: False
Boolean to keep fitness values for a curve.
If :code:`False`, then no curve is stored.
If :code:`True`, then a history of fitness values is provided as a
third return value.
random_state: int, default: None
If random_state is a positive integer, random_state is the seed used
by np.random.seed(); otherwise, the random seed is not set.
state_fitness_callback: function taking five parameters, default: None
If :code:`True`, then a history of fitness values is provided as a third return value.
random_state : int | None, default: None
If random_state is a positive integer, random_state is the seed used by np.random.seed(); otherwise, the random seed is not set.
state_fitness_callback : Callable[..., Any] | None, default: None
If specified, this callback will be invoked once per iteration.
Parameters are (iteration, max attempts reached?, current best state, current best fit, user callback data).
Return true to continue iterating, or false to stop.
callback_user_info: any, default: None
callback_user_info : Any, default: None
User data passed as last parameter of callback.
hamming_factor: float, default: 0.0
hamming_factor : float, default: 0.0
Factor to account for Hamming distance in parent selection.
hamming_decay_factor: float, default: None
hamming_decay_factor : float | None, default: None
Decay factor for the hamming_factor over iterations.
Returns
-------
best_state: np.ndarray
best_state : np.ndarray
Numpy array containing state that optimizes the fitness function.
best_fitness: float
best_fitness : float
Value of fitness function at best state.
fitness_curve: np.ndarray
Numpy array of arrays containing the fitness of the entire population
at every iteration.
fitness_curve : np.ndarray | None
Numpy array of arrays containing the fitness of the entire population at every iteration.
Only returned if input argument :code:`curve` is :code:`True`.
References
----------
Russell, S. and P. Norvig (2010). *Artificial Intelligence: A Modern
Approach*, 3rd edition. Prentice Hall, New Jersey, USA.
Russell, S. and P. Norvig (2010). *Artificial Intelligence: A Modern Approach*, 3rd edition.
Prentice Hall, New Jersey, USA.
"""
if pop_size < 0:
raise Exception("""pop_size must be a positive integer.""")
elif not isinstance(pop_size, int):
if pop_size.is_integer():
pop_size = int(pop_size)
else:
raise Exception("""pop_size must be a positive integer.""")

breeding_pop_size = int(pop_size * pop_breed_percent) - (minimum_elites + minimum_dregs)
if breeding_pop_size < 1:
raise Exception("""pop_breed_percent must be large enough to ensure at least one mating.""")

if pop_breed_percent > 1:
raise Exception("""pop_breed_percent must be less than 1.""")

if elite_dreg_ratio < 0 or elite_dreg_ratio > 1:
raise Exception("""elite_dreg_ratio must be between 0 and 1.""")

if mutation_prob < 0 or mutation_prob > 1:
raise Exception("""mutation_prob must be between 0 and 1.""")

if (not isinstance(max_attempts, int) and not max_attempts.is_integer()) or max_attempts < 0:
raise Exception("""max_attempts must be a positive integer.""")

if (not isinstance(max_iters, int) and max_iters != np.inf and not max_iters.is_integer()) or max_iters < 0:
raise Exception("""max_iters must be a positive integer.""")
if not isinstance(pop_size, int) or pop_size < 0:
raise ValueError(f"pop_size must be a positive integer. Got {pop_size}")
if not 0 <= pop_breed_percent <= 1:
raise ValueError(f"pop_breed_percent must be between 0 and 1. Got {pop_breed_percent}")
if not 0 <= elite_dreg_ratio <= 1:
raise ValueError(f"elite_dreg_ratio must be between 0 and 1. Got {elite_dreg_ratio}")
if not 0 <= mutation_prob <= 1:
raise ValueError(f"mutation_prob must be between 0 and 1. Got {mutation_prob}")
if not isinstance(max_attempts, int) or max_attempts < 0:
raise ValueError(f"max_attempts must be a positive integer. Got {max_attempts}")
if not (isinstance(max_iters, int) or max_iters == np.inf) or max_iters < 0:
raise ValueError(f"max_iters must be a positive integer or np.inf. Got {max_iters}")

# Set random seed
if isinstance(random_state, int) and random_state > 0:
np.random.seed(random_state)

# Initialize problem
fitness_curve = []

# Initialize problem, population and attempts counter
problem.reset()
problem.random_pop(pop_size)
if state_fitness_callback is not None:
Expand All @@ -151,25 +194,25 @@ def genetic_alg(problem, pop_size=200, pop_breed_percent=0.75, elite_dreg_ratio=
fitness_evaluations=problem.fitness_evaluations,
user_data=callback_user_info)

get_hamming_distance_func = None
get_hamming_distance_func: Callable[[np.ndarray, np.ndarray], np.ndarray] | None = None
if hamming_factor > 0:
g1 = problem.get_population()[0][0]
if isinstance(g1, float) or g1.dtype == 'float64':
get_hamming_distance_func = _get_hamming_distance_float
else:
get_hamming_distance_func = _get_hamming_distance_default

attempts = 0
iters = 0

# initialize survivor count, elite count and dreg count
breeding_pop_size = int(pop_size * pop_breed_percent) - (minimum_elites + minimum_dregs)
survivors_size = pop_size - breeding_pop_size
dregs_size = max(int(survivors_size * (1.0 - elite_dreg_ratio)) if survivors_size > 1 else 0, minimum_dregs)
elites_size = max(survivors_size - dregs_size, minimum_elites)
if dregs_size + elites_size > survivors_size:
over_population = dregs_size + elites_size - survivors_size
breeding_pop_size -= over_population

attempts = 0
iters = 0
continue_iterating = True
while (attempts < max_attempts) and (iters < max_iters):
iters += 1
Expand Down
Loading

0 comments on commit cd1ce2a

Please sign in to comment.