diff --git a/doc/math.qbk b/doc/math.qbk index 09057386d9..7c8f7804ef 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -728,6 +728,7 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [include optimization/differential_evolution.qbk] [include optimization/jso.qbk] [include optimization/random_search.qbk] +[include optimization/cma_es.qbk] [endmathpart] [/mathpart optimization Optimization] [mathpart poly Polynomials and Rational Functions] diff --git a/doc/optimization/cma_es.qbk b/doc/optimization/cma_es.qbk new file mode 100644 index 0000000000..4d17f7314c --- /dev/null +++ b/doc/optimization/cma_es.qbk @@ -0,0 +1,83 @@ +[/ +Copyright (c) 2024 Nick Thompson +Use, modification and distribution are subject to the +Boost Software License, Version 1.0. (See accompanying file +LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:cma_es Evolution Strategy with Covariance Matrix Adaptation] + +[heading Synopsis] + +`` + #include + + namespace boost::math::optimization { + + template struct cma_es_parameters { + using Real = typename ArgumentContainer::value_type; + ArgumentContainer lower_bounds; + ArgumentContainer upper_bounds; + Real std_dev = std::numeric_limits::quiet_NaN(); + Real initial_population_size = 0; + ArgumentContainer const * initial_guess = nullptr; + }; + + template + ArgumentContainer cma_es( + const Func cost_function, + random_search_parameters const ¶ms, + URBG &gen, + std::invoke_result_t target_value = std::numeric_limits>::quiet_NaN(), + std::atomic *cancellation = nullptr, + std::atomic> *current_minimum_cost = nullptr, + std::vector>> *queries = nullptr); + + } // namespaces +`` + +The `cma_es` function searches for a global minimum of a function. + + +[heading Parameters] + + `lower_bounds`: A container representing the lower bounds of the optimization space along each dimension. The `.size()` of the bounds should return the dimension of the problem. + `upper_bounds`: A container representing the upper bounds of the optimization space along each dimension. It should have the same size of `lower_bounds`, and each element should be >= the corresponding element of `lower_bounds`. + `initial_guess`: An optional guess for where we should start looking for solutions. This is provided for consistency with other optimization functions-it's not particularly useful for this function. + +[heading The function] + +`` +template +ArgumentContainer cma_es(const Func cost_function, + random_search_parameters const ¶ms, + URBG &gen, + std::invoke_result_t value_to_reach = std::numeric_limits>::quiet_NaN(), + std::atomic *cancellation = nullptr, + std::atomic> *current_minimum_cost = nullptr, + std::vector>> *queries = nullptr) +`` + +Parameters: + + `cost_function`: The cost function to be minimized. + `params`: The parameters to the algorithm as described above. + `gen`: A uniform random bit generator, like `std::mt19937_64`. + `value_to_reach`: An optional value that, if reached, stops the optimization. This is the most robust way to terminate the calculation, but in most cases the optimal value of the cost function is unknown. If it is, use it! Physical considerations can often be used to find optimal values for cost functions. + `cancellation`: An optional atomic boolean to allow the user to stop the computation and gracefully return the best result found up to that point. N.B.: Cancellation is not immediate; the in-progress generation finishes. + `current_minimum_cost`: An optional atomic variable to store the current minimum cost during optimization. This allows developers to (e.g.) plot the progress of the minimization over time and in conjunction with the `cancellation` argument allow halting the computation when the progress stagnates. + `queries`: An optional vector to store intermediate results during optimization. This is useful for debugging and perhaps volume rendering of the objective function after the calculation is complete. + +Returns: + +The argument vector corresponding to the minimum cost found by the optimization. + +[h4:examples Examples] + +An example exhibiting graceful cancellation and progress observability can be studied in [@../../example/cma_es_example.cpp cma_es_example.cpp]. + +[h4:references References] + +http://www.cmap.polytechnique.fr/~nikolaus.hansen/gecco2013-CMA-ES-tutorial.pdf + +[endsect] [/section:cma_es] diff --git a/include/boost/math/optimization/cma_es.hpp b/include/boost/math/optimization/cma_es.hpp new file mode 100644 index 0000000000..4ec1a0bbe8 --- /dev/null +++ b/include/boost/math/optimization/cma_es.hpp @@ -0,0 +1,161 @@ +/* + * Copyright Nick Thompson, 2024 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#ifndef BOOST_MATH_OPTIMIZATION_CMA_ES_HPP +#define BOOST_MATH_OPTIMIZATION_CMA_ES_HPP +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __has_include() +#include +#include +#else +#error "CMA-ES requires Eigen." +#endif + +// Follows the notation in: +// http://www.cmap.polytechnique.fr/~nikolaus.hansen/gecco2013-CMA-ES-tutorial.pdf + +namespace boost::math::optimization { + +template struct cma_es_parameters { + using Real = typename ArgumentContainer::value_type; + ArgumentContainer lower_bounds; + ArgumentContainer upper_bounds; + size_t max_function_calls = 10000*std::thread::hardware_concurrency(); + ArgumentContainer const *initial_guess = nullptr; + Real step_length = std::numeric_limits::quiet_NaN(); + size_t initial_population_size = 0; + unsigned threads = std::thread::hardware_concurrency(); +}; + +template +void validate_cma_es_parameters(cma_es_parameters const ¶ms) { + using std::isfinite; + using std::isnan; + std::ostringstream oss; + detail::validate_bounds(params.lower_bounds, params.upper_bounds); + if (params.initial_guess) { + detail::validate_initial_guess(*params.initial_guess, params.lower_bounds, params.upper_bounds); + } + if (params.threads == 0) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": There must be at least one thread."; + throw std::invalid_argument(oss.str()); + } +} + +template +ArgumentContainer cma_es( + const Func cost_function, + cma_es_parameters const ¶ms, + URBG &gen, + std::invoke_result_t target_value = std::numeric_limits>::quiet_NaN(), + std::atomic *cancellation = nullptr, + std::atomic> *current_minimum_cost = nullptr, + std::vector>> *queries = nullptr) + { + using Real = typename ArgumentContainer::value_type; + using ResultType = std::invoke_result_t; + using std::isnan; + using std::uniform_real_distribution; + validate_cma_es_parameters(params); + const size_t dimension = params.lower_bounds.size(); + std::atomic target_attained = false; + std::atomic lowest_cost = std::numeric_limits::infinity(); + ArgumentContainer best_vector; + if constexpr (detail::has_resize_v) { + best_vector.resize(dimension, std::numeric_limits::quiet_NaN()); + } + // http://www.cmap.polytechnique.fr/~nikolaus.hansen/gecco2013-CMA-ES-tutorial.pdf, slide 16: + Real c_c = Real(4)/dimension; + Real c_sigma = Real(4)/dimension; + Real c_1 = Real(2)/(dimension*dimension); + std::vector weights(params.initial_population_size, std::numeric_limits::quiet_NaN()); + // mu:= number of parents, lambda := number of offspring. + auto C = Eigen::Matrix::Identity(dimension, dimension); + if (params.initial_guess) { + auto initial_cost = cost_function(*params.initial_guess); + if (!isnan(initial_cost)) { + best_vector = *params.initial_guess; + if (current_minimum_cost) { + *current_minimum_cost = initial_cost; + } + } + } + std::mutex mt; + std::vector thread_pool; + std::atomic function_calls = 0; + for (unsigned j = 0; j < params.threads; ++j) { + auto seed = gen(); + thread_pool.emplace_back([&, seed]() { + URBG g(seed); + ArgumentContainer trial_vector; + // This vector is empty unless the user requests the queries be stored: + std::vector>> local_queries; + if constexpr (detail::has_resize_v) { + trial_vector.resize(dimension, std::numeric_limits::quiet_NaN()); + } + while (function_calls < params.max_function_calls) { + if (cancellation && *cancellation) { + break; + } + if (target_attained) { + break; + } + // Fill trial vector: + uniform_real_distribution unif01(Real(0), Real(1)); + for (size_t i = 0; i < dimension; ++i) { + trial_vector[i] = params.lower_bounds[i] + (params.upper_bounds[i] - params.lower_bounds[i])*unif01(g); + } + ResultType trial_cost = cost_function(trial_vector); + ++function_calls; + if (isnan(trial_cost)) { + continue; + } + if (trial_cost < lowest_cost) { + lowest_cost = trial_cost; + if (current_minimum_cost) { + *current_minimum_cost = trial_cost; + } + // We expect to need to acquire this lock with decreasing frequency + // as the computation proceeds: + std::scoped_lock lock(mt); + best_vector = trial_vector; + } + if (queries) { + local_queries.push_back(std::make_pair(trial_vector, trial_cost)); + } + if (!isnan(target_value) && trial_cost <= target_value) { + target_attained = true; + } + } + if (queries) { + std::scoped_lock lock(mt); + queries->insert(queries->begin(), local_queries.begin(), local_queries.end()); + } + }); + } + for (auto &thread : thread_pool) { + thread.join(); + } + return best_vector; +} + +} // namespace boost::math::optimization +#endif diff --git a/include/boost/math/optimization/detail/multivariate_normal_distribution.hpp b/include/boost/math/optimization/detail/multivariate_normal_distribution.hpp new file mode 100644 index 0000000000..a54d614996 --- /dev/null +++ b/include/boost/math/optimization/detail/multivariate_normal_distribution.hpp @@ -0,0 +1,96 @@ +/* + * Copyright Nick Thompson, 2024 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#ifndef BOOST_MATH_OPTIMIZATION_DETAIL_MULTIVARIATE_NORMAL_HPP +#define BOOST_MATH_OPTIMIZATION_DETAIL_MULTIVARIATE_NORMAL_HPP +#include +#include +#include +#include +#include +#include +#include +#if __has_include() +#include +#else +#error "The Eigen library is required for the successful operation of this class" +#endif + +namespace boost::math::optimization::detail { + +// This is super useful functionality, but nonetheless it must be shunted off into a dark corner of the library +// because even today there is no standard matrix class and no standard was to do a Cholesky decomposition. +// Hence a more public place in the library just puts users in dependency hell. +template +class multivariate_normal_distribution { +public: + using Real = typename RandomAccessContainer::value_type; + multivariate_normal_distribution(RandomAccessContainer const & mean, Eigen::Matrix const & covariance_matrix) : m_{mean} { + if (covariance_matrix.rows() != covariance_matrix.cols()) { + std::ostringstream oss; + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The covariance matrix must be square, but received a (" << covariance_matrix.rows() << ", " << covariance_matrix.cols() << ") matrix."; + throw std::domain_error(oss.str()); + } + if (mean.size() != covariance_matrix.cols()) { + std::ostringstream oss; + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The mean has dimension " << mean.size() << " but the covariance matrix has " << covariance_matrix.cols() << " columns."; + throw std::domain_error(oss.str()); + } + Eigen::LLT > llt(covariance_matrix); + if(llt.info() == Eigen::NumericalIssue) { + std::ostringstream oss; + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The covariance matrix is not positive definite. We probably need to use Eigen::LDLT instead."; + throw std::domain_error(oss.str()); + } + L_ = llt.matrixL(); + } + + template + RandomAccessContainer operator()(URNG& g) const { + RandomAccessContainer x; + if constexpr (detail::has_resize_v) { + x.resize(m_.size()); + } + (*this)(x, g); + return x; + } + + template + void operator()(RandomAccessContainer& x, URNG& g) const { + using std::normal_distribution; + if (x.size() != m_.size()) { + std::ostringstream oss; + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": Must provide a vector of the same length as the mean."; + throw std::domain_error(oss.str()); + } + + auto dis = normal_distribution(0, 1); + /// First generate standard normal random vector: + Eigen::Vector u; + u.resize(x.size()); + for (size_t i = 0; i < x.size(); ++i) { + u[i] = dis(g); + } + // Transform it with L: + u = L_*u; + for (size_t i = 0; i < x.size(); ++i) { + x[i] = u[i] + m_[i]; + } + } + + + +private: + RandomAccessContainer m_; + Eigen::Matrix L_; +}; + +} // namespace boost::math::optimization::detail +#endif diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 532b875212..1293a23905 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -1258,6 +1258,8 @@ test-suite interpolators : [ run differential_evolution_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run jso_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] [ run random_search_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + [ run cma_es_test.cpp : : : [ requires cxx17_if_constexpr cxx17_std_apply ] ] + ; test-suite quadrature : diff --git a/test/cma_es_test.cpp b/test/cma_es_test.cpp new file mode 100644 index 0000000000..b46ccb1f8e --- /dev/null +++ b/test/cma_es_test.cpp @@ -0,0 +1,195 @@ +/* + * Copyright Nick Thompson, 2024 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#include "math_unit_test.hpp" +#include "test_functions_for_optimization.hpp" +#include +#include +#include +#include +#include +#include + +using std::abs; +using boost::math::optimization::cma_es; +using boost::math::optimization::cma_es_parameters; +using boost::math::optimization::detail::multivariate_normal_distribution; + +template void test_multivariate_normal() { + + using Eigen::Matrix; + using Eigen::Dynamic; + constexpr const size_t n = 7; + Matrix C = Matrix::Identity(n, n); + std::array mean; + mean.fill(0); + auto mvn = multivariate_normal_distribution(mean, C); + std::array x; + std::array empirical_means; + empirical_means.fill(0); + std::mt19937_64 mt(12345); + + size_t i = 0; + size_t samples = 1024; + do { + x = mvn(mt); + for (size_t j = 0; j < n; ++j) { + empirical_means[j] += x[j]; + } + } while(i++ < samples); + + for (size_t j = 0; j < n; ++j) { + empirical_means[j] /= samples; + CHECK_LE(abs(empirical_means[j]), 0.05); + } + + // Exhibits why we might need to use the LDL^T decomposition instead: + //C(0,0) = 0; + //mvn = multivariate_normal_distribution(mean, C); +} + + +template void test_ackley() { + std::cout << "Testing CMA-ES on Ackley function . . .\n"; + using ArgType = std::array; + auto params = cma_es_parameters(); + params.lower_bounds = {-5, -5}; + params.upper_bounds = {5, 5}; + + std::mt19937_64 gen(12345); + auto local_minima = cma_es(ackley, params, gen); + CHECK_LE(std::abs(local_minima[0]), Real(0.1)); + CHECK_LE(std::abs(local_minima[1]), Real(0.1)); + + // Does it work with a lambda? + auto ack = [](std::array const &x) { return ackley(x); }; + local_minima = cma_es(ack, params, gen); + CHECK_LE(std::abs(local_minima[0]), Real(0.1)); + CHECK_LE(std::abs(local_minima[1]), Real(0.1)); + + // Test that if an intial guess is the exact solution, the returned solution is the exact solution: + std::array initial_guess{0, 0}; + params.initial_guess = &initial_guess; + local_minima = cma_es(ack, params, gen); + CHECK_EQUAL(local_minima[0], Real(0)); + CHECK_EQUAL(local_minima[1], Real(0)); + + std::atomic cancel = false; + Real target_value = 0.0; + std::atomic current_minimum_cost = std::numeric_limits::quiet_NaN(); + // Test query storage: + std::vector> queries; + local_minima = cma_es(ack, params, gen, target_value, &cancel, ¤t_minimum_cost, &queries); + CHECK_EQUAL(local_minima[0], Real(0)); + CHECK_EQUAL(local_minima[1], Real(0)); + CHECK_LE(size_t(1), queries.size()); + for (auto const & q : queries) { + auto expected = ackley(q.first); + CHECK_EQUAL(expected, q.second); + } +} + + +template void test_rosenbrock_saddle() { + std::cout << "Testing CMA-ES on Rosenbrock saddle . . .\n"; + using ArgType = std::array; + auto params = cma_es_parameters(); + params.lower_bounds = {0.5, 0.5}; + params.upper_bounds = {2.048, 2.048}; + params.max_function_calls = 20000; + std::mt19937_64 gen(234568); + auto local_minima = cma_es(rosenbrock_saddle, params, gen); + + CHECK_ABSOLUTE_ERROR(Real(1), local_minima[0], Real(0.05)); + CHECK_ABSOLUTE_ERROR(Real(1), local_minima[1], Real(0.05)); + + // Does cancellation work? + std::atomic cancel = true; + gen.seed(12345); + local_minima = + cma_es(rosenbrock_saddle, params, gen, std::numeric_limits::quiet_NaN(), &cancel); + CHECK_GE(std::abs(local_minima[0] - Real(1)), std::sqrt(std::numeric_limits::epsilon())); +} + + +template void test_rastrigin() { + std::cout << "Testing CMA-ES on Rastrigin function (global minimum = (0,0,...,0))\n"; + using ArgType = std::vector; + auto params = cma_es_parameters(); + params.lower_bounds.resize(3, static_cast(-5.12)); + params.upper_bounds.resize(3, static_cast(5.12)); + params.max_function_calls = 1000000; + std::mt19937_64 gen(34567); + + // By definition, the value of the function which a target value is provided must be <= target_value. + Real target_value = 2.0; + auto local_minima = cma_es(rastrigin, params, gen, target_value); + CHECK_LE(rastrigin(local_minima), target_value); +} + + +// Tests NaN return types and return type != input type: +void test_sphere() { + std::cout << "Testing CMA-ES on sphere . . .\n"; + using ArgType = std::vector; + auto params = cma_es_parameters(); + params.lower_bounds.resize(4, -1); + params.upper_bounds.resize(4, 1); + params.max_function_calls = 100000; + std::mt19937_64 gen(56789); + auto local_minima = cma_es(sphere, params, gen); + for (auto x : local_minima) { + CHECK_ABSOLUTE_ERROR(0.0f, x, 0.5f); + } +} + + +template +void test_three_hump_camel() { + std::cout << "Testing CMA-ES on three hump camel . . .\n"; + using ArgType = std::array; + auto params = cma_es_parameters(); + params.lower_bounds[0] = -5.0; + params.lower_bounds[1] = -5.0; + params.upper_bounds[0] = 5.0; + params.upper_bounds[1] = 5.0; + std::mt19937_64 gen(56789); + auto local_minima = cma_es(three_hump_camel, params, gen); + for (auto x : local_minima) { + CHECK_ABSOLUTE_ERROR(0.0f, x, 0.2f); + } +} + + +template +void test_beale() { + std::cout << "Testing CMA-ES on the Beale function . . .\n"; + using ArgType = std::array; + auto params = cma_es_parameters(); + params.lower_bounds[0] = -5.0; + params.lower_bounds[1] = -5.0; + params.upper_bounds[0]= 5.0; + params.upper_bounds[1]= 5.0; + std::mt19937_64 gen(56789); + auto local_minima = cma_es(beale, params, gen); + CHECK_ABSOLUTE_ERROR(Real(3), local_minima[0], Real(0.1)); + CHECK_ABSOLUTE_ERROR(Real(1)/Real(2), local_minima[1], Real(0.1)); +} + +int main() { +#if 0 && (defined(__clang__) || defined(_MSC_VER)) + test_ackley(); + test_ackley(); + test_rosenbrock_saddle(); + test_rastrigin(); + test_three_hump_camel(); + test_beale(); +#endif + //test_sphere(); + test_multivariate_normal(); + return boost::math::test::report_errors(); +}