diff --git a/mlrose_hiive/algorithms/crossovers/tsp_crossover.py b/mlrose_hiive/algorithms/crossovers/tsp_crossover.py index 3ead6f54..b6112ea5 100644 --- a/mlrose_hiive/algorithms/crossovers/tsp_crossover.py +++ b/mlrose_hiive/algorithms/crossovers/tsp_crossover.py @@ -17,12 +17,6 @@ def __init__(self, opt_prob): def mate(self, p1, p2): return self._mate_fill(p1, p2) - """ - if np.random.randint(2) == 0: - return self._mate_fill(p1, p2) - else: - return self._mate_traverse(p1, p2) - """ def _mate_fill(self, p1, p2): if self._length > 1: diff --git a/mlrose_hiive/algorithms/rhc.py b/mlrose_hiive/algorithms/rhc.py index 4251dbd4..bf542f7d 100644 --- a/mlrose_hiive/algorithms/rhc.py +++ b/mlrose_hiive/algorithms/rhc.py @@ -58,16 +58,13 @@ def random_hill_climb(problem, max_attempts=10, max_iters=np.inf, restarts=0, Brownlee, J (2011). *Clever Algorithms: Nature-Inspired Programming Recipes*. ``_. """ - if (not isinstance(max_attempts, int) and not max_attempts.is_integer()) \ - or (max_attempts < 0): + if (not isinstance(max_attempts, int) and not max_attempts.is_integer()) or max_attempts < 0: raise Exception("""max_attempts must be a positive integer.""") - if (not isinstance(max_iters, int) and max_iters != np.inf - and not max_iters.is_integer()) or (max_iters < 0): + if (not isinstance(max_iters, int) and max_iters != np.inf and not max_iters.is_integer()) or max_iters < 0: raise Exception("""max_iters must be a positive integer.""") - if (not isinstance(restarts, int) and not restarts.is_integer()) \ - or (restarts < 0): + if (not isinstance(restarts, int) and not restarts.is_integer()) or restarts < 0: raise Exception("""restarts must be a positive integer.""") if init_state is not None and len(init_state) != problem.get_length(): @@ -83,8 +80,6 @@ def random_hill_climb(problem, max_attempts=10, max_iters=np.inf, restarts=0, best_fitness_curve = [] all_curves = [] - continue_iterating = True - # problem.reset() for current_restart in range(restarts + 1): # Initialize optimization problem and attempts counter fevals = problem.fitness_evaluations @@ -160,6 +155,7 @@ def random_hill_climb(problem, max_attempts=10, max_iters=np.inf, restarts=0, # break out if we can stop if problem.can_stop(): break + best_fitness *= problem.get_maximize() return best_state, best_fitness, np.asarray(best_fitness_curve) if curve else None diff --git a/mlrose_hiive/algorithms/sa.py b/mlrose_hiive/algorithms/sa.py index f2bb6ce0..c821ada1 100644 --- a/mlrose_hiive/algorithms/sa.py +++ b/mlrose_hiive/algorithms/sa.py @@ -11,9 +11,8 @@ @short_name('sa') -def simulated_annealing(problem, schedule=GeomDecay(), max_attempts=10, - max_iters=np.inf, init_state=None, curve=False, - fevals=False, random_state=None, +def simulated_annealing(problem, schedule=GeomDecay(), max_attempts=10, max_iters=np.inf, + init_state=None, curve=False, random_state=None, state_fitness_callback=None, callback_user_info=None): """Use simulated annealing to find the optimum for a given optimization problem. @@ -37,11 +36,6 @@ def simulated_annealing(problem, schedule=GeomDecay(), max_attempts=10, If :code:`False`, then no curve is stored. If :code:`True`, then a history of fitness values is provided as a third return value. - fevals: bool, default: False - Boolean to track the number of fitness function evaluations. - If :code:`False`, then nothing additional is returned. - If :code:`True`, then a history of function evaluations per iteration - is provided as a fourth return value. random_state: int, default: None If random_state is a positive integer, random_state is the seed used by np.random.seed(); otherwise, the random seed is not set. @@ -66,12 +60,10 @@ def simulated_annealing(problem, schedule=GeomDecay(), max_attempts=10, Approach*, 3rd edition. Prentice Hall, New Jersey, USA. """ - if (not isinstance(max_attempts, int) and not max_attempts.is_integer()) \ - or (max_attempts < 0): + if (not isinstance(max_attempts, int) and not max_attempts.is_integer()) or max_attempts < 0: raise Exception("""max_attempts must be a positive integer.""") - if (not isinstance(max_iters, int) and max_iters != np.inf - and not max_iters.is_integer()) or (max_iters < 0): + if (not isinstance(max_iters, int) and max_iters != np.inf and not max_iters.is_integer()) or max_iters < 0: raise Exception("""max_iters must be a positive integer.""") if init_state is not None and len(init_state) != problem.get_length(): diff --git a/mlrose_hiive/fitness/continuous_peaks.py b/mlrose_hiive/fitness/continuous_peaks.py index 31cd9ad2..12e3ae64 100644 --- a/mlrose_hiive/fitness/continuous_peaks.py +++ b/mlrose_hiive/fitness/continuous_peaks.py @@ -76,7 +76,7 @@ def evaluate(self, state): max_1 = self.max_run(1, state) # Calculate R(X, T) - if (max_0 > _t and max_1 > _t): + if max_0 > _t and max_1 > _t: _r = _n else: _r = 0 diff --git a/mlrose_hiive/fitness/four_peaks.py b/mlrose_hiive/fitness/four_peaks.py index e8a919be..96a4e932 100644 --- a/mlrose_hiive/fitness/four_peaks.py +++ b/mlrose_hiive/fitness/four_peaks.py @@ -83,7 +83,7 @@ def evaluate(self, state): head_1 = self.head(1, state) # Calculate R(X, T) - if (tail_0 > _t and head_1 > _t): + if tail_0 > _t and head_1 > _t: _r = _n else: _r = 0 diff --git a/mlrose_hiive/fitness/max_k_color.py b/mlrose_hiive/fitness/max_k_color.py index 4ba678f1..c413a07f 100644 --- a/mlrose_hiive/fitness/max_k_color.py +++ b/mlrose_hiive/fitness/max_k_color.py @@ -70,22 +70,18 @@ def evaluate(self, state): fitness: float Value of fitness function. """ - - fitness = 0 - - # this is the count of neigbor nodes with the same state value. - # Therefore state value represents color. - # This is NOT what the docs above say. + # This fitness score is the count of neigbor nodes with the same state value. + # Therefore, state value represents color. + # FIXME: This is NOT what the docs above say. edges = self.edges if self.graph_edges is None else self.graph_edges if self.maximize: - # Maximise the number of adjacent nodes not of the same colour. - fitness = sum(int(state[n1] != state[n2]) for (n1, n2) in edges) - else: - # Minimise the number of adjacent nodes of the same colour. - fitness = sum(int(state[n1] == state[n2]) for (n1, n2) in edges) - return fitness + # Maximize the number of adjacent nodes not of the same color. + return sum(int(state[n1] != state[n2]) for (n1, n2) in edges) + + # Minimize the number of adjacent nodes of the same color. + return sum(int(state[n1] == state[n2]) for (n1, n2) in edges) def get_prob_type(self): """ Return the problem type. diff --git a/mlrose_hiive/fitness/travelling_sales.py b/mlrose_hiive/fitness/travelling_sales.py index eec2e5e0..541b2075 100644 --- a/mlrose_hiive/fitness/travelling_sales.py +++ b/mlrose_hiive/fitness/travelling_sales.py @@ -151,7 +151,7 @@ def __calculate_fitness_by_distance(self, state): nodes = np.array([[state[i-1], state[i]] for i in range(1, ls)] + [[state[ls-1]] + [state[0]]]) nodes.sort(axis=1) - df_nodes = pd.merge(self.df_path_list, pd.DataFrame(nodes), how='inner') + df_nodes = pd.merge(self.df_path_list, pd.DataFrame(nodes)) if df_nodes.shape[0] != nodes.shape[0]: return np.inf fitness = df_nodes.iloc[:, 2].sum() diff --git a/mlrose_hiive/generators/knapsack_generator.py b/mlrose_hiive/generators/knapsack_generator.py index 4f797f5a..6b6960bf 100644 --- a/mlrose_hiive/generators/knapsack_generator.py +++ b/mlrose_hiive/generators/knapsack_generator.py @@ -17,9 +17,6 @@ def generate(seed, number_of_items_types=10, np.random.seed(seed) weights = 1 + np.random.randint(max_weight_per_item, size=number_of_items_types) values = 1 + np.random.randint(max_value_per_item, size=number_of_items_types) - problem = mlrose_hiive.KnapsackOpt(length=number_of_items_types, - maximize=True, max_val=max_item_count, - weights=weights, values=values, - max_weight_pct=max_weight_pct, - multiply_by_max_item_count=multiply_by_max_item_count) + problem = mlrose_hiive.KnapsackOpt(length=number_of_items_types, max_val=max_item_count, weights=weights, values=values, + max_weight_pct=max_weight_pct, multiply_by_max_item_count=multiply_by_max_item_count) return problem diff --git a/mlrose_hiive/generators/max_k_color_generator.py b/mlrose_hiive/generators/max_k_color_generator.py index 41c75fda..ff6111a2 100644 --- a/mlrose_hiive/generators/max_k_color_generator.py +++ b/mlrose_hiive/generators/max_k_color_generator.py @@ -15,7 +15,7 @@ def generate(seed, number_of_nodes=20, max_connections_per_node=4, max_colors=No """ np.random.seed(seed) # all nodes have to be connected, somehow. - node_connection_counts = 1 + np.random.randint(max_connections_per_node, size=number_of_nodes) + node_connection_counts = 1 + np.random.choice(max_connections_per_node, size=number_of_nodes) node_connections = {} nodes = range(number_of_nodes) diff --git a/mlrose_hiive/generators/tsp_generator.py b/mlrose_hiive/generators/tsp_generator.py index 88e66202..4e9ffdae 100644 --- a/mlrose_hiive/generators/tsp_generator.py +++ b/mlrose_hiive/generators/tsp_generator.py @@ -32,7 +32,7 @@ def generate(seed, number_of_cities, area_width=250, area_height=250): g.add_edge(a, b, length=int(round(distance))) - return TSPOpt(coords=coords, distances=distances, maximize=False, source_graph=g) + return TSPOpt(coords=coords, distances=distances, source_graph=g) @staticmethod diff --git a/mlrose_hiive/neural/_nn_core.py b/mlrose_hiive/neural/_nn_core.py index 47b6ae5a..50c43226 100644 --- a/mlrose_hiive/neural/_nn_core.py +++ b/mlrose_hiive/neural/_nn_core.py @@ -10,7 +10,7 @@ from mlrose_hiive.algorithms.decay import GeomDecay from mlrose_hiive.algorithms.rhc import random_hill_climb from mlrose_hiive.algorithms.sa import simulated_annealing -from mlrose_hiive.algorithms.ga import genetic_alg +from mlrose_hiive.algorithms.ga import genetic_alg from mlrose_hiive.neural._nn_base import _NNBase from mlrose_hiive.neural.activation import (identity, relu, sigmoid, tanh) @@ -108,17 +108,15 @@ def _validate(self): raise Exception("""Algorithm must be one of: 'random_hill_climb', 'simulated_annealing', 'genetic_alg', 'gradient_descent'.""") - - def _validate_input(self, X, y): + + def _validate_input(self, y): """ Add _classes attribute based on classes present in y. """ - - # Required for sk-learn 1.3+. Doesn't cause issues for lower versions. + # Required for scikit-learn 1.3+. Doesn't cause issues for lower versions. # Copied from https://github.com/scikit-learn/scikit-learn/blob/5c4aa5d0d90ba66247d675d4c3fc2fdfba3c39ff/sklearn/neural_network/_multilayer_perceptron.py # Note: no workaround found for multi-class labels, still doesn't work with f1 score. - - if (not hasattr(self, "classes_")): + if not hasattr(self, "classes_"): self._label_binarizer = LabelBinarizer() self._label_binarizer.fit(y) self.classes_ = self._label_binarizer.classes_ @@ -141,7 +139,7 @@ def fit(self, X, y=None, init_weights=None): If :code:`None`, then a random state is used. """ self._validate() - self._validate_input(X, y) + self._validate_input(y) X, y = self._format_x_y_data(X, y) @@ -248,31 +246,26 @@ def __run_with_rhc(self, init_weights, num_nodes, problem): fitness_curve = [] fitted_weights = [] loss = np.inf + # Can't use restart feature of random_hill_climb function, since # want to keep initial weights in the range -1 to 1. for _ in range(self.restarts + 1): restart_weights = np.random.uniform(-1, 1, num_nodes) if init_weights is None else init_weights - if self.curve: - current_weights, current_loss, fitness_curve = \ - random_hill_climb(problem, - max_attempts=self.max_attempts if - self.early_stopping else - self.max_iters, - max_iters=self.max_iters, - restarts=0, init_state=restart_weights, - curve=self.curve) - else: - current_weights, current_loss, _ = random_hill_climb( - problem, - max_attempts=self.max_attempts if self.early_stopping - else self.max_iters, - max_iters=self.max_iters, - restarts=0, init_state=restart_weights, curve=self.curve) + current_weights, current_loss, fitness_curve = random_hill_climb(problem, + max_attempts=(self.max_attempts if self.early_stopping + else self.max_iters), + max_iters=self.max_iters, + init_state=restart_weights, + curve=self.curve) + + if not self.curve: + fitness_curve = [] if current_loss < loss: fitted_weights = current_weights loss = current_loss + return fitness_curve, fitted_weights, loss def predict(self, X): diff --git a/mlrose_hiive/neural/fitness/network_weights.py b/mlrose_hiive/neural/fitness/network_weights.py index babc49e9..75cbf483 100644 --- a/mlrose_hiive/neural/fitness/network_weights.py +++ b/mlrose_hiive/neural/fitness/network_weights.py @@ -34,7 +34,7 @@ class NetworkWeights: bias: bool, default: True Whether a bias term is included in the network. - is_classifer: bool, default: True + is_classifier: bool, default: True Whether the network is for classification or regression. Set True for classification and False for regression. """ diff --git a/mlrose_hiive/neural/logistic_regression.py b/mlrose_hiive/neural/logistic_regression.py index 4ac45992..239b181e 100644 --- a/mlrose_hiive/neural/logistic_regression.py +++ b/mlrose_hiive/neural/logistic_regression.py @@ -87,11 +87,7 @@ def __init__(self, algorithm='random_hill_climb', max_iters=100, bias=True, mutation_prob=0.1, max_attempts=10, random_state=None, curve=False): - _NNCore.__init__( - self, hidden_nodes=[], activation='sigmoid', - algorithm=algorithm, max_iters=max_iters, bias=bias, - is_classifier=True, learning_rate=learning_rate, - early_stopping=early_stopping, clip_max=clip_max, - restarts=restarts, schedule=schedule, pop_size=pop_size, - mutation_prob=mutation_prob, max_attempts=max_attempts, - random_state=random_state, curve=curve) + _NNCore.__init__(self, hidden_nodes=[], activation='sigmoid', algorithm=algorithm, max_iters=max_iters, bias=bias, + learning_rate=learning_rate, early_stopping=early_stopping, clip_max=clip_max, restarts=restarts, + schedule=schedule, pop_size=pop_size, mutation_prob=mutation_prob, max_attempts=max_attempts, + random_state=random_state, curve=curve) diff --git a/mlrose_hiive/neural/neural_network.py b/mlrose_hiive/neural/neural_network.py index a8a67e07..60192a39 100644 --- a/mlrose_hiive/neural/neural_network.py +++ b/mlrose_hiive/neural/neural_network.py @@ -33,7 +33,7 @@ class NeuralNetwork(_NNCore, ClassifierMixin): bias: bool, default: True Whether to include a bias term. - is_classifer: bool, default: True + is_classifier: bool, default: True Whether the network is for classification or regression. Set :code:`True` for classification and :code:`False` for regression. diff --git a/mlrose_hiive/neural/utils/weights.py b/mlrose_hiive/neural/utils/weights.py index 9ebb51c9..4d1139bf 100644 --- a/mlrose_hiive/neural/utils/weights.py +++ b/mlrose_hiive/neural/utils/weights.py @@ -118,9 +118,7 @@ def gradient_descent_original(problem, max_attempts=10, max_iters=np.inf, else: problem.set_state(init_state) - if curve: - fitness_curve = [] - + fitness_curve = [] attempts = 0 iters = 0 diff --git a/mlrose_hiive/opt_probs/discrete_opt.py b/mlrose_hiive/opt_probs/discrete_opt.py index 3a466efd..58da5f80 100644 --- a/mlrose_hiive/opt_probs/discrete_opt.py +++ b/mlrose_hiive/opt_probs/discrete_opt.py @@ -57,9 +57,9 @@ def __init__(self, length, fitness_fn, maximize=True, max_val=2, else: self.max_val = max_val - self.keep_sample = [] + self.keep_sample = np.array([]) self.node_probs = np.zeros([self.length, self.max_val, self.max_val]) - self.parent_nodes = [] + self.parent_nodes = np.array([]) self.sample_order = [] self.prob_type = 'discrete' self.noise = 0 @@ -98,8 +98,7 @@ def eval_node_probs(self): for i in range(1, self.length): for j in range(self.max_val): - subset = self.keep_sample[np.where( - self.keep_sample[:, parent[i - 1]] == j)[0]] + subset = self.keep_sample[np.where(self.keep_sample[:, parent[i - 1]] == j)[0]] if not len(subset): probs[i, j] = 1 / self.max_val @@ -145,9 +144,7 @@ def _get_mutual_info_slow(self): mutual_info = np.zeros([self.length, self.length]) for i in range(self.length - 1): for j in range(i + 1, self.length): - mutual_info[i, j] = -1 * mutual_info_score( - self.keep_sample[:, i], - self.keep_sample[:, j]) + mutual_info[i, j] = -1 * mutual_info_score(self.keep_sample[:, i], self.keep_sample[:, j]) return mutual_info # adapted from https://github.com/parkds/mlrose/blob/f7154a1d3e3fdcd934bb3c683b943264d2870fd1/mlrose/algorithms.py @@ -241,7 +238,7 @@ def find_sample_order(self): """ sample_order = [] last = [0] - parent = np.array(self.parent_nodes) + parent = self.parent_nodes while len(sample_order) < self.length: inds = [] diff --git a/mlrose_hiive/opt_probs/flip_flop_opt.py b/mlrose_hiive/opt_probs/flip_flop_opt.py index 20ed6814..6de8d80c 100644 --- a/mlrose_hiive/opt_probs/flip_flop_opt.py +++ b/mlrose_hiive/opt_probs/flip_flop_opt.py @@ -29,7 +29,7 @@ def __init__(self, length=None, fitness_fn=None, maximize=True, self.max_val = 2 crossover = OnePointCrossOver(self) if crossover is None else crossover mutator = ChangeOneMutator(self) if mutator is None else mutator - super().__init__(length, fitness_fn, maximize, 2, crossover, mutator) + super().__init__(length, fitness_fn, maximize, crossover=crossover, mutator=mutator) state = np.random.randint(2, size=self.length) self.set_state(state) diff --git a/mlrose_hiive/opt_probs/tsp_opt.py b/mlrose_hiive/opt_probs/tsp_opt.py index bdb4e321..97acb1e4 100644 --- a/mlrose_hiive/opt_probs/tsp_opt.py +++ b/mlrose_hiive/opt_probs/tsp_opt.py @@ -121,7 +121,6 @@ def random_mimic(self): """ remaining = list(np.arange(self.length)) state = np.zeros(self.length, dtype=np.int8) - sample_order = self.sample_order[1:] node_probs = np.copy(self.node_probs) # Get value of first element in new sample diff --git a/mlrose_hiive/runners/_nn_runner_base.py b/mlrose_hiive/runners/_nn_runner_base.py index d6dbf116..c1f17a6d 100644 --- a/mlrose_hiive/runners/_nn_runner_base.py +++ b/mlrose_hiive/runners/_nn_runner_base.py @@ -105,16 +105,6 @@ def run(self): finally: self._tear_down() - """ - best = { - 'best_params': sr.best_params_, - 'best_score': sr.best_score_, - 'best_estimator': sr.best_estimator_, - 'best_loss': sr.best_estimator_.best_loss_, - 'best_fitted_weights': sr.best_estimator_.fitted_weights # ndarray - } - """ - def _get_pickle_filename_root(self, name): filename_root = super()._get_pickle_filename_root(name) arg_text = ''.join([f'{k}_{self._sanitize_value(v)}_' diff --git a/mlrose_hiive/runners/nngs_runner.py b/mlrose_hiive/runners/nngs_runner.py index 2612f751..0f7c8b6b 100644 --- a/mlrose_hiive/runners/nngs_runner.py +++ b/mlrose_hiive/runners/nngs_runner.py @@ -1,9 +1,8 @@ import sklearn.metrics as skmt -from mlrose_hiive import NNClassifier, relu -from mlrose_hiive.decorators import short_name +from mlrose_hiive import NNClassifier from mlrose_hiive.runners._nn_runner_base import _NNRunnerBase -from mlrose_hiive.decorators import get_short_name +from mlrose_hiive.decorators import short_name, get_short_name """ Example usage: @@ -40,9 +39,8 @@ class NNGSRunner(_NNRunnerBase): def __init__(self, x_train, y_train, x_test, y_test, experiment_name, seed, iteration_list, algorithm, grid_search_parameters, grid_search_scorer_method=skmt.balanced_accuracy_score, - bias=True, early_stopping=True, clip_max=1e+10, activation=None, - max_attempts=500, n_jobs=1, cv=5, generate_curves=True, output_directory=None, - **kwargs): + bias=True, early_stopping=True, clip_max=1e+10, max_attempts=500, n_jobs=1, cv=5, + generate_curves=True, output_directory=None, **kwargs): # update short name based on algorithm self._set_dynamic_runner_name(f'{get_short_name(self)}_{get_short_name(algorithm)}') diff --git a/mlrose_hiive/samples/synthetic_data.py b/mlrose_hiive/samples/synthetic_data.py index 93b55734..5ba1a72d 100644 --- a/mlrose_hiive/samples/synthetic_data.py +++ b/mlrose_hiive/samples/synthetic_data.py @@ -38,7 +38,7 @@ def get_synthetic_data(self, x_dim=20, y_dim=20, add_noise=0.0, add_redundant_co output = self.root_directory + f'/synthetic__sz_{x_dim*y_dim}__n_{1 if add_noise else 0}__rc_{add_redundant_column}/'.lower().replace('.', '_') try: makedirs(output) - except OSError as e: + except OSError as _: pass features, classes = self.get_synthetic_features_and_classes(add_redundant_column) return sd2, features, classes, output