From bd518d948a98de1964aef995657c1a901f5c29b9 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Tue, 13 Feb 2024 14:52:49 -0800 Subject: [PATCH] Use threads more effectively in differential evolution --- .../optimization/differential_evolution.hpp | 79 +++++++++++-------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/include/boost/math/optimization/differential_evolution.hpp b/include/boost/math/optimization/differential_evolution.hpp index 1c34d053d9..4a488542b8 100644 --- a/include/boost/math/optimization/differential_evolution.hpp +++ b/include/boost/math/optimization/differential_evolution.hpp @@ -141,6 +141,12 @@ ArgumentContainer differential_evolution( trial_vectors[i].resize(dimension); } } + std::vector thread_generators(threads); + for (size_t j = 0; j < threads; ++j) { + thread_generators[j].seed(gen()); + } + // std::vector isn't threadsafe! + std::vector updated_indices(NP, 0); for (size_t generation = 0; generation < de_params.max_generations; ++generation) { if (cancellation && *cancellation) { @@ -149,43 +155,11 @@ ArgumentContainer differential_evolution( if (target_attained) { break; } - - // You might be tempted to parallelize the generation of trial vectors. - // Here's the deal: Reproducibly generating random numbers is a nightmare. - // I first tried seeding thread-local random number generators with the global generator, - // but ThreadSanitizer didn't like it. I *suspect* there's a way around this, but - // even if it's formally threadsafe, there's still a lot of effort to get it computationally reproducible. - uniform_real_distribution unif01(DimensionlessReal(0), DimensionlessReal(1)); - for (size_t i = 0; i < NP; ++i) { - size_t r1, r2, r3; - do { - r1 = gen() % NP; - } while (r1 == i); - do { - r2 = gen() % NP; - } while (r2 == i || r2 == r1); - do { - r3 = gen() % NP; - } while (r3 == i || r3 == r2 || r3 == r1); - for (size_t j = 0; j < dimension; ++j) { - // See equation (4) of the reference: - auto guaranteed_changed_idx = gen() % dimension; - if (unif01(gen) < de_params.crossover_probability || j == guaranteed_changed_idx) { - auto tmp = population[r1][j] + de_params.mutation_factor * (population[r2][j] - population[r3][j]); - auto const &lb = de_params.lower_bounds[j]; - auto const &ub = de_params.upper_bounds[j]; - // Some others recommend regenerating the indices rather than clamping; - // I dunno seems like it could get stuck regenerating . . . - trial_vectors[i][j] = clamp(tmp, lb, ub); - } else { - trial_vectors[i][j] = population[i][j]; - } - } - } - thread_pool.resize(0); for (size_t j = 0; j < threads; ++j) { thread_pool.emplace_back([&, j]() { + auto& tlg = thread_generators[j]; + uniform_real_distribution unif01(DimensionlessReal(0), DimensionlessReal(1)); for (size_t i = j; i < cost.size(); i += threads) { if (target_attained) { return; @@ -193,6 +167,32 @@ ArgumentContainer differential_evolution( if (cancellation && *cancellation) { return; } + size_t r1, r2, r3; + do { + r1 = tlg() % NP; + } while (r1 == i); + do { + r2 = tlg() % NP; + } while (r2 == i || r2 == r1); + do { + r3 = tlg() % NP; + } while (r3 == i || r3 == r2 || r3 == r1); + + for (size_t k = 0; k < dimension; ++k) { + // See equation (4) of the reference: + auto guaranteed_changed_idx = tlg() % dimension; + if (unif01(tlg) < de_params.crossover_probability || k == guaranteed_changed_idx) { + auto tmp = population[r1][k] + de_params.mutation_factor * (population[r2][k] - population[r3][k]); + auto const &lb = de_params.lower_bounds[k]; + auto const &ub = de_params.upper_bounds[k]; + // Some others recommend regenerating the indices rather than clamping; + // I dunno seems like it could get stuck regenerating . . . + trial_vectors[i][k] = clamp(tmp, lb, ub); + } else { + trial_vectors[i][k] = population[i][k]; + } + } + auto const trial_cost = cost_function(trial_vectors[i]); if (isnan(trial_cost)) { continue; @@ -209,7 +209,10 @@ ArgumentContainer differential_evolution( if (current_minimum_cost && cost[i] < *current_minimum_cost) { *current_minimum_cost = cost[i]; } - population[i] = trial_vectors[i]; + // Can't do this! It's a race condition! + //population[i] = trial_vectors[i]; + // Instead mark all the indices that need to be updated: + updated_indices[i] = 1; } } }); @@ -217,6 +220,12 @@ ArgumentContainer differential_evolution( for (auto &thread : thread_pool) { thread.join(); } + for (size_t i = 0; i < NP; ++i) { + if (updated_indices[i]) { + population[i] = trial_vectors[i]; + updated_indices[i] = 0; + } + } } auto it = std::min_element(cost.begin(), cost.end());