From 43d885746185b509ae851e22e0dc489686a64a49 Mon Sep 17 00:00:00 2001 From: Miroslav Stoyanov Date: Thu, 23 Jan 2025 13:58:22 -0500 Subject: [PATCH] cleaning up docs and sharp edges --- src/asgard_discretization.cpp | 10 +- src/asgard_discretization.hpp | 3 +- src/asgard_io.cpp | 21 ++-- src/asgard_program_options.cpp | 13 +-- src/asgard_program_options.hpp | 12 ++- src/asgard_program_options_tests.cpp | 16 +-- src/asgard_solver.cpp | 33 +++--- src/asgard_solver.hpp | 148 +++++++++------------------ src/asgard_term_manager.cpp | 20 ++-- src/asgard_term_manager.hpp | 9 +- src/asgard_time_advance.cpp | 81 ++++----------- src/diffusion.cpp | 69 +++++++++---- src/pde/asgard_pde_base.hpp | 11 +- 13 files changed, 208 insertions(+), 238 deletions(-) diff --git a/src/asgard_discretization.cpp b/src/asgard_discretization.cpp index 1f5c62a82..10a777f1e 100644 --- a/src/asgard_discretization.cpp +++ b/src/asgard_discretization.cpp @@ -155,16 +155,16 @@ void discretization_manager::start_cold() degree_ = options.degree.value(); - if (not stop_verbosity()) - std::cout << "\n -- ASGarD discretization options --\n"; - if (high_verbosity()) { std::cout << "Branch: " << GIT_BRANCH << '\n'; std::cout << "Commit Summary: " << GIT_COMMIT_HASH << GIT_COMMIT_SUMMARY << '\n'; - std::cout << "This executable was built on " << BUILD_TIME << '\n'; + std::cout << "The library was built on " << BUILD_TIME << '\n'; } + if (not stop_verbosity()) + std::cout << "\n -- ASGarD discretization options --\n"; + sgrid = sparse_grid(options); if (not stop_verbosity()) { @@ -572,7 +572,7 @@ void discretization_manager::ode_sv(imex_flag imflag, kronops.make(imflag, *pde, matrices, grid); precision const tolerance = *options.isolver_tolerance; int const restart = *options.isolver_iterations; - int const max_iter = *options.isolver_outer_iterations; + int const max_iter = *options.isolver_inner_iterations; sol.resize(static_cast(x.size())); std::copy(x.begin(), x.end(), sol.begin()); if (solver == solve_opts::gmres) diff --git a/src/asgard_discretization.hpp b/src/asgard_discretization.hpp index 6095f5f9e..3b6d10459 100644 --- a/src/asgard_discretization.hpp +++ b/src/asgard_discretization.hpp @@ -175,8 +175,7 @@ class discretization_manager terms.apply_all(sgrid, conn, alpha, x, beta, y); } //! applies ADI preconditioner for all terms - void terms_apply_adi(std::vector const &x, - std::vector &y) const + void terms_apply_adi(precision const x[], precision y[]) const { tools::time_event performance_("terms_apply_adi kronmult"); terms.apply_all_adi(sgrid, conn, x, y); diff --git a/src/asgard_io.cpp b/src/asgard_io.cpp index c2be49d63..ef468db49 100644 --- a/src/asgard_io.cpp +++ b/src/asgard_io.cpp @@ -141,7 +141,7 @@ void write_output(PDE

const &pde, // std::vector> const &moments, H5Easy::dump(file, "isolver_tolerance", options.isolver_tolerance.value()); H5Easy::dump(file, "isolver_iterations", options.isolver_iterations.value()); - H5Easy::dump(file, "isolver_outer_iterations", options.isolver_outer_iterations.value()); + H5Easy::dump(file, "isolver_inner_iterations", options.isolver_inner_iterations.value()); // save some basic build info H5Easy::dump(file, "GIT_BRANCH", std::string(GIT_BRANCH)); @@ -281,7 +281,7 @@ void h5writer

::write(PDEv2

const &pde, int degree, sparse_grid const &grid H5Easy::dump(file, "solver_method", static_cast(options.solver.value_or(solve_opts::direct))); H5Easy::dump(file, "solver_itol", options.isolver_tolerance.value_or(-1)); H5Easy::dump(file, "solver_iter", options.isolver_iterations.value_or(-1)); - H5Easy::dump(file, "solver_oiter", options.isolver_outer_iterations.value_or(-1)); + H5Easy::dump(file, "solver_inner", options.isolver_inner_iterations.value_or(-1)); } H5Easy::dump(file, "timer_report", tools::timer.report()); @@ -498,12 +498,21 @@ void h5writer

::read(std::string const &filename, bool silent, PDEv2

&pde, { // solver data section if (not pde.options_.solver) pde.options_.solver = static_cast(H5Easy::load(file, "solver_method")); - if (not pde.options_.isolver_tolerance) + if (not pde.options_.isolver_tolerance) { pde.options_.isolver_tolerance = H5Easy::load(file, "solver_itol"); - if (not pde.options_.isolver_iterations) + if (pde.options_.isolver_tolerance.value() < 0) + pde.options_.isolver_tolerance = pde.options_.default_isolver_tolerance; + } + if (not pde.options_.isolver_iterations) { pde.options_.isolver_iterations = H5Easy::load(file, "solver_iter"); - if (not pde.options_.isolver_outer_iterations) - pde.options_.isolver_outer_iterations = H5Easy::load(file, "solver_oiter"); + if (pde.options_.isolver_iterations.value() < 0) + pde.options_.isolver_iterations = pde.options_.default_isolver_iterations; + } + if (not pde.options_.isolver_inner_iterations) { + pde.options_.isolver_inner_iterations = H5Easy::load(file, "solver_inner"); + if (pde.options_.isolver_inner_iterations.value() < 0) + pde.options_.isolver_inner_iterations = pde.options_.default_isolver_inner_iterations; + } } state = H5Easy::load>(file, "state"); diff --git a/src/asgard_program_options.cpp b/src/asgard_program_options.cpp index d55b74063..2ab744282 100644 --- a/src/asgard_program_options.cpp +++ b/src/asgard_program_options.cpp @@ -113,8 +113,9 @@ Options Short Value Description adi - very experimental, not very stable (yet) -isolve-tol -ist double Iterative solver tolerance, applies to GMRES and BICG. -isolve-iter -isi int Iterative solver maximum number of iterations, - for GMRES this is the number of inner iterations. --isolve-outer -iso int (GMRES only) The maximum number of outer GMRES iterations. + for GMRES this is the number of outer iterations. +-isolve-inner -isn int (GMRES only) The maximum number of inner GMRES iterations, + this is ignored by BiCGSTAB. Leaving soon: -memory int Memory limit for the GPU, applied to the earlier versions @@ -222,8 +223,8 @@ void prog_opts::process_inputs(std::vector const &argv, {"-kron-mode", optentry::kron_mode}, {"-isolve-tol", optentry::isol_tolerance}, {"-ist", optentry::isol_tolerance}, {"-isolve-iter", optentry::isol_iterations}, {"-isi", optentry::isol_iterations}, - {"-isolve-outer", optentry::isol_outer_iterations}, - {"-iso", optentry::isol_outer_iterations}, + {"-isolve-inner", optentry::isol_inner_iterations}, + {"-isn", optentry::isol_inner_iterations}, {"-restart", optentry::restart_file}, }; @@ -558,12 +559,12 @@ void prog_opts::process_inputs(std::vector const &argv, } } break; - case optentry::isol_outer_iterations: { + case optentry::isol_inner_iterations: { auto selected = move_process_next(); if (not selected) throw std::runtime_error(report_no_value()); try { - isolver_outer_iterations = std::stoi(selected->data()); + isolver_inner_iterations = std::stoi(selected->data()); } catch(std::invalid_argument &) { throw std::runtime_error(report_wrong_value()); } catch(std::out_of_range &) { diff --git a/src/asgard_program_options.hpp b/src/asgard_program_options.hpp index b72731d35..63ae7d9dc 100644 --- a/src/asgard_program_options.hpp +++ b/src/asgard_program_options.hpp @@ -69,7 +69,7 @@ enum class preconditioner_opts none = 0, //! diagonal Jacobi preconditioner jacobi, - //! using alternating direction pseudoinverse + //! using alternating direction pseudoinverse (experimental) adi }; @@ -430,7 +430,7 @@ struct prog_opts //! max number of iterations (inner iterations for gmres) std::optional isolver_iterations; //! max number of output gmres iterations - std::optional isolver_outer_iterations; + std::optional isolver_inner_iterations; //! local kron method only, mode dense or sparse (faster but memory hungry) std::optional kron_mode; @@ -626,6 +626,12 @@ struct prog_opts std::optional default_solver; //! used in place of the preconditioner type, if preconditioner is not specified std::optional default_precon; + //! used in place of the tolerance, if tolerance is not specified + std::optional default_isolver_tolerance; + //! max number of iterations (inner iterations for gmres) + std::optional default_isolver_iterations; + //! max number of outer gmres iterations + std::optional default_isolver_inner_iterations; //! returns the first available from stop-time, default-stop-time or -1 double get_stop_time() const { return stop_time.value_or(default_stop_time.value_or(-1)); } @@ -683,7 +689,7 @@ struct prog_opts kron_mode, isol_tolerance, isol_iterations, - isol_outer_iterations, + isol_inner_iterations, restart_file, set_verbosity }; diff --git a/src/asgard_program_options_tests.cpp b/src/asgard_program_options_tests.cpp index b61a3f00c..3c30bf5e3 100644 --- a/src/asgard_program_options_tests.cpp +++ b/src/asgard_program_options_tests.cpp @@ -245,15 +245,15 @@ TEST_CASE("new program options", "[single options]") REQUIRE_THROWS_WITH(prog_opts(vecstrview({"exe", "-isi", "dummy"})), "invalid value for -isi, see exe -help"); } - SECTION("-isolve_outer") + SECTION("-isolve_inner") { - prog_opts prog(vecstrview({"", "-isolve-outer", "200"})); - REQUIRE(prog.isolver_outer_iterations); - REQUIRE(prog.isolver_outer_iterations.value() == 200); - REQUIRE_THROWS_WITH(prog_opts(vecstrview({"exe", "-isolve-outer"})), - "-isolve-outer must be followed by a value, see exe -help"); - REQUIRE_THROWS_WITH(prog_opts(vecstrview({"exe", "-iso", "dummy"})), - "invalid value for -iso, see exe -help"); + prog_opts prog(vecstrview({"", "-isolve-inner", "200"})); + REQUIRE(prog.isolver_inner_iterations); + REQUIRE(prog.isolver_inner_iterations.value() == 200); + REQUIRE_THROWS_WITH(prog_opts(vecstrview({"exe", "-isolve-inner"})), + "-isolve-inner must be followed by a value, see exe -help"); + REQUIRE_THROWS_WITH(prog_opts(vecstrview({"exe", "-isn", "dummy"})), + "invalid value for -isn, see exe -help"); } SECTION("-title") { diff --git a/src/asgard_solver.cpp b/src/asgard_solver.cpp index 7d0a45069..d54049456 100644 --- a/src/asgard_solver.cpp +++ b/src/asgard_solver.cpp @@ -657,7 +657,7 @@ int bicgstab

::solve( auto dot = [&](std::vector

const &a, std::vector

const &b) -> P { P r = 0; -//ASGARD_OMP_PARFOR_SIMD_EXTRA(reduction(+:r)) +ASGARD_OMP_PARFOR_SIMD_EXTRA(reduction(+:r)) for (int64_t i = 0; i < n; i++) r += a[i] * b[i]; return r; @@ -665,7 +665,7 @@ int bicgstab

::solve( auto dot1 = [&](std::vector

const &a) -> P { P r = 0; -//ASGARD_OMP_PARFOR_SIMD_EXTRA(reduction(+:r)) +ASGARD_OMP_PARFOR_SIMD_EXTRA(reduction(+:r)) for (int64_t i = 0; i < n; i++) r += a[i] * a[i]; return r; @@ -684,7 +684,7 @@ ASGARD_OMP_PARFOR_SIMD r = rhs; int num_appy = 1; - apply_lhs(-1, x, 1, r); // r0 = b - A * x0 + apply_lhs(-1, x.data(), 1, r.data()); // r0 = b - A * x0 P rho = dot1(r); @@ -693,7 +693,7 @@ ASGARD_OMP_PARFOR_SIMD for (int i = 0; i < max_iter_; i++) { ++num_appy; - apply_lhs(1, p, 0, v); // v = A * p + apply_lhs(1, p.data(), 0, v.data()); // v = A * p P const alpha = rho / dot(rref, v); @@ -705,7 +705,7 @@ ASGARD_OMP_PARFOR_SIMD } ++num_appy; - apply_lhs(1, r, 0, t); // t = A * p + apply_lhs(1, r.data(), 0, t.data()); // t = A * p P const omega = dot(r, t) / dot1(t); @@ -732,8 +732,8 @@ ASGARD_OMP_PARFOR_SIMD template int gmres

::solve( - operatoin_apply_precon_arr

apply_precon, - operatoin_apply_lhs_arr

apply_lhs, std::vector

const &rhs, + operatoin_apply_precon

apply_precon, + operatoin_apply_lhs

apply_lhs, std::vector

const &rhs, std::vector

&x) const { int const n = static_cast(rhs.size()); @@ -936,6 +936,13 @@ ASGARD_OMP_PARFOR_SIMD } } +template +void solver_manager

::xpby(std::vector

const &x, P beta, P y[]) { +ASGARD_OMP_PARFOR_SIMD + for (size_t i = 0; i < x.size(); i++) + y[i] = x[i] + beta * y[i]; +} + template void solver_manager

::print_opts(std::ostream &os) const { @@ -946,18 +953,18 @@ void solver_manager

::print_opts(std::ostream &os) const os << " direct\n"; break; case 1: - os << " bicgstab\n"; - os << " tolerance: " << std::get>(var).tolerance() << '\n'; - os << " max iterations: " << std::get>(var).max_iter() << '\n'; - has_precon = true; - break; - case 2: os << " gmres\n"; os << " tolerance: " << std::get>(var).tolerance() << '\n'; os << " max inner: " << std::get>(var).max_inner() << '\n'; os << " max outer: " << std::get>(var).max_outer() << '\n'; has_precon = true; break; + case 2: + os << " bicgstab\n"; + os << " tolerance: " << std::get>(var).tolerance() << '\n'; + os << " max iterations: " << std::get>(var).max_iter() << '\n'; + has_precon = true; + break; default: break; } diff --git a/src/asgard_solver.hpp b/src/asgard_solver.hpp index 19b768ba1..649987eb9 100644 --- a/src/asgard_solver.hpp +++ b/src/asgard_solver.hpp @@ -115,21 +115,6 @@ class poisson_data std::vector

diag, subdiag, rhs; }; -/*! - * \internal - * \brief Stores the data for a diagonal Jacobi preconditioner - * - * \endinternal - */ -template -struct prec_diagonal_jacobi -{ - //! make a default, no-preconditioner - prec_diagonal_jacobi() = default; - //! holds the inverse of the diagonal entries - std::vector

prec; -}; - /*! * \internal * \brief Direct solver, explicitly forms the dense matrix, very expensive @@ -164,18 +149,6 @@ class direct dense_matrix

mat; }; -/*! - * \internal - * \brief Signature for the left-hand linear operation for a solver - * - * Computes `y = alpha * A * x + beta * y` - * - * \endinternal - */ -template -using operatoin_apply_lhs = - std::function const &x, P beta, std::vector

&y)>; - /*! * \internal * \brief Signature for the left-hand linear operation for a solver, raw-array variant @@ -185,21 +158,9 @@ using operatoin_apply_lhs = * \endinternal */ template -using operatoin_apply_lhs_arr = +using operatoin_apply_lhs = std::function; -/*! - * \internal - * \brief Signature for the preconditioner - * - * Computes `y = inverse-P * x` and do not need the constants - * - * \endinternal - */ -template -using operatoin_apply_precon = - std::function const &x, std::vector

&y)>; - /*! * \internal * \brief Signature for the preconditioner @@ -210,7 +171,7 @@ using operatoin_apply_precon = * \endinternal */ template -using operatoin_apply_precon_arr = std::function; +using operatoin_apply_precon = std::function; /*! * \internal @@ -281,8 +242,8 @@ class gmres } //! solve for the given linear operators, right-hand-side and initial iterate - int solve(operatoin_apply_precon_arr

apply_precon, - operatoin_apply_lhs_arr

apply_lhs, std::vector

const &rhs, + int solve(operatoin_apply_precon

apply_precon, + operatoin_apply_lhs

apply_lhs, std::vector

const &rhs, std::vector

&x) const; //! returns the set tolerance @@ -327,8 +288,12 @@ struct solver_manager solver_manager() = default; //! create a new solver solver_manager(prog_opts const &options) - : opt(options.solver.value()) { + rassert(options.solver, "implicit time-stepping requires a solver, e.g., " + "see --help for list of available solvers"); + + opt = options.solver.value(); + switch (opt) { case solve_opts::direct: var = solvers::direct

(); // will be initialized later @@ -347,11 +312,11 @@ struct solver_manager "missing tolerance for the iterative solver gmres"); rassert(options.isolver_iterations, "missing number of iterations for the iterative solver gmres"); - rassert(options.isolver_outer_iterations, + rassert(options.isolver_inner_iterations, "missing number of outer iterations for the iterative solver gmres"); var = solvers::gmres

(options.isolver_tolerance.value(), - options.isolver_iterations.value(), - options.isolver_outer_iterations.value()); + options.isolver_inner_iterations.value(), + options.isolver_iterations.value()); precon = options.precon.value_or(preconditioner_opts::none); break; default: // unreachable @@ -365,7 +330,7 @@ struct solver_manager std::get>(var)(x); } - //! iterate_solve without a preconditioner + //! iterative solver, calls the appropriate iterative solver void iterate_solve(solvers::operatoin_apply_lhs

apply_lhs, std::vector

const &rhs, std::vector

&x) const { @@ -377,55 +342,39 @@ struct solver_manager solvers::operatoin_apply_lhs

apply_lhs, std::vector

const &rhs, std::vector

&x) const { - expect(opt == solve_opts::bicgstab); - if (precon) { - // in the bicgstab, explicitly apply the preconditioner - solvers::bicgstab

const &bicg = std::get>(var); - bicg.prec_rhs.resize(rhs.size()); - bicg.prec_y.resize(rhs.size()); - bicg.prec_yb.resize(rhs.size()); - - num_apply += 1; - precon(rhs, bicg.prec_rhs); - num_apply += bicg.solve( - [&](P alpha, std::vector

const &x, P beta, std::vector

&y) -> void - { - if (beta != 0) - bicg.prec_yb = y; - - apply_lhs(alpha, x, 0, bicg.prec_y); - precon(bicg.prec_y, y); - - if (beta != 0) - for (size_t i = 0; i < y.size(); i++) - y[i] += beta * bicg.prec_yb[i]; - }, - bicg.prec_rhs, x); - } else { - num_apply += std::get>(var).solve(apply_lhs, rhs, x); - } - } - - //! iterative solver, calls the appropriate iterative solver - void iterate_solve(solvers::operatoin_apply_lhs_arr

apply_lhs, - std::vector

const &rhs, std::vector

&x) const - { - iterate_solve(nullptr, apply_lhs, rhs, x); - } - - //! iterative solver, calls the appropriate iterative solver - void iterate_solve(solvers::operatoin_apply_precon_arr

precon, - solvers::operatoin_apply_lhs_arr

apply_lhs, - std::vector

const &rhs, std::vector

&x) const - { - expect(opt == solve_opts::gmres); - if (precon) { - solvers::gmres

const &gmres = std::get>(var); - - num_apply += gmres.solve(precon, apply_lhs, rhs, x); - } else { - num_apply += std::get>(var).solve( - [](P *)->void{ /* no preconditioner */ }, apply_lhs, rhs, x); + expect(opt != solve_opts::direct); + if (opt == solve_opts::bicgstab) { + if (precon) { + solvers::bicgstab

const &bicg = std::get>(var); + + bicg.prec_y.resize(rhs.size()); + + bicg.prec_rhs = rhs; + precon(bicg.prec_rhs.data()); + + num_apply += bicg.solve([&](P alpha, P const xx[], P beta, P y[]) + -> void { + if (beta == 0) { + apply_lhs(alpha, xx, 0, y); + precon(y); + } else { + apply_lhs(alpha, xx, 0, bicg.prec_y.data()); + precon(bicg.prec_y.data()); + xpby(bicg.prec_y, beta, y); + } + }, bicg.prec_rhs, x); + } else { + num_apply += std::get>(var).solve(apply_lhs, rhs, x); + } + } else { // if (opt == solve_opts::gmres) + if (precon) { + solvers::gmres

const &gmres = std::get>(var); + + num_apply += gmres.solve(precon, apply_lhs, rhs, x); + } else { + num_apply += std::get>(var).solve( + [](P *)->void{ /* no preconditioner */ }, apply_lhs, rhs, x); + } } } @@ -446,9 +395,12 @@ struct solver_manager //! remembers the generation of the grid that was used to last set the manager int grid_gen = -1; //! holds the actual solver instance - std::variant, solvers::bicgstab

, solvers::gmres

> var; + std::variant, solvers::gmres

, solvers::bicgstab

> var; //! holds data for the jacobi preconditioner std::vector

jacobi; + + //! helper method, y = x + beta * y, compiles with OpenMP and SIMD + static void xpby(std::vector

const &x, P beta, P y[]); }; /*! diff --git a/src/asgard_term_manager.cpp b/src/asgard_term_manager.cpp index bb057ed37..4a58f57b6 100644 --- a/src/asgard_term_manager.cpp +++ b/src/asgard_term_manager.cpp @@ -123,7 +123,6 @@ void term_manager

::rebuld_term( // build the ADI preconditioner here if (precon == preconditioner_opts::adi) { - std::cout << " rebuilding ad\n"; if (is_diag) { to_euler(legendre.pdof, alpha, wraw_diag); psedoinvert(legendre.pdof, wraw_diag, raw_diag0); @@ -369,29 +368,29 @@ void term_manager

::apply_all( template void term_manager

::apply_all_adi( sparse_grid const &grid, connection_patterns const &conns, - std::vector

const &x, std::vector

&y) const + P const x[], P y[]) const { - expect(x.size() == y.size()); - expect(x.size() == kwork.w1.size()); + int64_t const n = grid.num_indexes() * fm::ipow(legendre.pdof, grid.num_dims()); + + static std::vector

t1, t2; // more additional workspaces - std::vector

t1 = x; - std::vector

t2 = x; + t1.resize(n); + t2.resize(n); + std::copy_n(x, n, t1.data()); auto it = terms.begin(); while (it < terms.end()) { if (it->num_chain == 1) { - kron_term_adi(grid, conns, *it, 1, t1, 0, t2); + kron_term_adi(grid, conns, *it, 1, t1.data(), 0, t2.data()); std::swap(t1, t2); ++it; } else { // TODO: consider whether we should do this or not it += it->num_chain; } - - // b = 1; // next iteration appends on y } - y = t1; + std::copy_n(t1.data(), n, y); } template @@ -480,7 +479,6 @@ void term_manager

::kron_diag( } } - #ifdef ASGARD_ENABLE_DOUBLE template struct term_entry; template struct term_manager; diff --git a/src/asgard_term_manager.hpp b/src/asgard_term_manager.hpp index 146b29e12..c9c56a527 100644 --- a/src/asgard_term_manager.hpp +++ b/src/asgard_term_manager.hpp @@ -92,7 +92,7 @@ struct term_manager //! y = prod(terms_adi * x), applies the ADI preconditioning to all terms void apply_all_adi(sparse_grid const &grid, connection_patterns const &conns, - std::vector

const &x, std::vector

&y) const; + P const x[], P y[]) const; //! construct term diagonal void make_jacobi(sparse_grid const &grid, connection_patterns const &conns, @@ -114,11 +114,10 @@ struct term_manager alpha, x, beta, y, kwork); } void kron_term_adi(sparse_grid const &grid, connection_patterns const &conns, - term_entry

const &tme, P alpha, std::vector

const &x, P beta, - std::vector

&y) const + term_entry

const &tme, P alpha, P const x[], P beta, + P y[]) const { - block_cpu(legendre.pdof, grid, conns, tme.perm, tme.adi, - alpha, x.data(), beta, y.data(), kwork); + block_cpu(legendre.pdof, grid, conns, tme.perm, tme.adi, alpha, x, beta, y, kwork); } template diff --git a/src/asgard_time_advance.cpp b/src/asgard_time_advance.cpp index 318225ca9..032f76e7f 100644 --- a/src/asgard_time_advance.cpp +++ b/src/asgard_time_advance.cpp @@ -171,8 +171,8 @@ imex_advance(discretization_manager

&disc, // Implicit step f_1: f_1 - dt B f_1 = f_1s solve_opts solver = options.solver.value(); P const tolerance = *options.isolver_tolerance; - int const restart = *options.isolver_iterations; - int const max_iter = *options.isolver_outer_iterations; + int const restart = *options.isolver_inner_iterations; + int const max_iter = *options.isolver_iterations; fk::vector f_1(f.size()); fk::vector f_1_output(f.size()); if (pde.do_collision_operator()) @@ -561,66 +561,25 @@ void crank_nicolson

::next_step( P const time = disc.time_params().time(); P const dt = disc.time_params().dt(); + // if the grid changed since the last time we used the solver + // update the matrices and preconditioners, update-grid checks what's needed if (solver.grid_gen != disc.get_sgrid().generation()) solver.update_grid(disc.get_sgrid(), disc.get_conn(), disc.get_terms(), 0.5 * dt); - next = current; // copy + if (solver.opt == solve_opts::direct) { + next = current; // copy - disc.terms_apply_all(-0.5 * dt, current, 1, next); - disc.add_ode_rhs_sources(time + 0.5 * dt, dt, next); + disc.terms_apply_all(-0.5 * dt, current, 1, next); + disc.add_ode_rhs_sources(time + 0.5 * dt, dt, next); - if (solver.opt == solve_opts::direct) { solver.direct_solve(next); - } else if (solver.opt == solve_opts::bicgstab) { - work = next; + } else { // iterative solver + // form the right-hand-side inside work + work = current; + disc.terms_apply_all(-0.5 * dt, current, 1, work); + disc.add_ode_rhs_sources(time + 0.5 * dt, dt, work); - int64_t const n = static_cast(work.size()); - - switch (solver.precon) { - case preconditioner_opts::none: - solver.iterate_solve( - [&](P alpha, std::vector

const &x, P beta, std::vector

&y) -> void - { - ASGARD_OMP_PARFOR_SIMD - for (int64_t i = 0; i < n; i++) - y[i] = alpha * x[i] + beta * y[i]; - disc.terms_apply_all(0.5 * alpha * dt, x, 1, y); - }, work, next); - break; - case preconditioner_opts::jacobi: - solver.iterate_solve( - [&](std::vector

const &x, std::vector

&y) -> void - { - ASGARD_OMP_PARFOR_SIMD - for (int64_t i = 0; i < n; i++) - y[i] = x[i] * solver.jacobi[i]; - }, - [&](P alpha, std::vector

const &x, P beta, std::vector

&y) -> void - { - ASGARD_OMP_PARFOR_SIMD - for (int64_t i = 0; i < n; i++) - y[i] = alpha * x[i] + beta * y[i]; - disc.terms_apply_all(0.5 * alpha * dt, x, 1, y); - }, work, next); - break; - default: - // assuming ADI - solver.iterate_solve( - [&](std::vector

const &x, std::vector

&y) -> void - { - disc.terms_apply_adi(x, y); - }, - [&](P alpha, std::vector

const &x, P beta, std::vector

&y) -> void - { - ASGARD_OMP_PARFOR_SIMD - for (int64_t i = 0; i < n; i++) - y[i] = alpha * x[i] + beta * y[i]; - disc.terms_apply_all(0.5 * alpha * dt, x, 1, y); - }, work, next); - break; - } - } else if (solver.opt == solve_opts::gmres) { - work = next; + next = current; // use the current step as the initial guess int64_t const n = static_cast(work.size()); @@ -639,6 +598,7 @@ void crank_nicolson

::next_step( solver.iterate_solve( [&](P y[]) -> void { + tools::time_event timing_("jacobi preconditioner"); ASGARD_OMP_PARFOR_SIMD for (int64_t i = 0; i < n; i++) y[i] *= solver.jacobi[i]; @@ -651,13 +611,15 @@ void crank_nicolson

::next_step( disc.terms_apply_all(0.5 * alpha * dt, x, 1, y); }, work, next); break; - default: + default: { + static std::vector

adi_work; + adi_work.resize(work.size()); // assuming ADI solver.iterate_solve( - [&](P []) -> void + [&](P y[]) -> void { - // disc.terms_apply_adi(x, y); - throw std::runtime_error("ADI not implemented for GMRES"); + disc.terms_apply_adi(y, adi_work.data()); + std::copy(adi_work.begin(), adi_work.end(), y); }, [&](P alpha, P const x[], P beta, P y[]) -> void { @@ -666,6 +628,7 @@ void crank_nicolson

::next_step( y[i] = alpha * x[i] + beta * y[i]; disc.terms_apply_all(0.5 * alpha * dt, x, 1, y); }, work, next); + } break; } } diff --git a/src/diffusion.cpp b/src/diffusion.cpp index 939f797be..a6da20cc4 100644 --- a/src/diffusion.cpp +++ b/src/diffusion.cpp @@ -81,10 +81,39 @@ asgard::PDEv2

make_diffusion_pde(int num_dims, asgard::prog_opts options) { options.default_stop_time = 3.0; // integrate until T = 3 - // using implicit Crank-Nicolson method + // using implicit Crank-Nicolson method, which requires a solver options.default_step_method = asgard::time_advance::method::cn; - options.default_solver = asgard::solve_opts::direct; // bad but OK for this example + if (options.max_level() < 5) { + // direct (dense) solver is fast for small problems and works well for prototyping + // and debugging, since it remove from the problem some additional factors, + // such as solver tolerance and number of iterations + options.default_solver = asgard::solve_opts::direct; + } else { + // when the problem size becomes significant, forming and factorizing the dense + // operator matrix becomes prohibitively expensive in flops and memory usage + // iterative solvers are needed and it is good to specify default parameters + options.default_solver = asgard::solve_opts::gmres; + } + + // only the iterative solvers will use these values + // jacobi is the fastest and (currently) most stable preconditioner + options.default_precon = asgard::preconditioner_opts::jacobi; + + // the tolerance for the iterative solver should probably be updated + // based on the time-step and the max-level + // this is a tight number of remove the solver error from consideration + options.default_isolver_tolerance = 1.E-7; + + // the number of iterations should depends on the time-step and condition + // number of the operators, should be kept high to allow for convergence + options.isolver_iterations = 1000; + + // GMRES uses a two-loop approach (restarted GMRES) + // the inner iterations explicitly form and manipulate the basis for the Krylov sub-space + // this requires lots of memory and the number here should be kept moderate + // (memory usage is dominated by isolver_inner_iterations * degrees-of-freedom) + options.isolver_inner_iterations = 50; // create a pde from the given options and domain // we can read the variables using pde.options() and pde.domain() (both return const-refs) @@ -255,7 +284,7 @@ double get_error_l2(asgard::discretization_manager

const &disc) { // when cos(t) vanishes, so does the exact solution and enorm -> 0 // for small values of enorm, the relative error is artificially magnified // switch between relative and absolute error - if (enorm < 1.E-3) + if (enorm < 1) return std::sqrt(ndiff + enorm - nself); else return std::sqrt((ndiff + enorm - nself) / enorm); @@ -404,33 +433,33 @@ void self_test() { #ifdef ASGARD_ENABLE_DOUBLE // check convergence w.r.t. level - dotest(1.E-3, 1, "-l 4 -n 20"); - dotest(1.E-4, 1, "-l 5 -n 20"); - dotest(5.E-5, 1, "-l 6 -n 20"); + dotest(1.E-3, 1, "-l 4 -n 20 -sv direct"); + dotest(1.E-4, 1, "-l 5 -n 20 -sv direct"); + dotest(5.E-5, 1, "-l 6 -n 20 -sv direct"); - dotest(1.E-3, 1, "-l 4 -n 20"); // check chaining - dotest(1.E-4, 1, "-l 5 -n 20"); - dotest(5.E-5, 1, "-l 6 -n 20"); + dotest(1.E-3, 1, "-l 4 -n 20 -sv direct"); // check chaining + dotest(1.E-4, 1, "-l 5 -n 20 -sv direct"); + dotest(5.E-5, 1, "-l 6 -n 20 -sv direct"); - dotest(1.E-1, 1, "-l 5 -d 0 -n 20"); - dotest(5.E-3, 1, "-l 5 -d 1 -n 20"); - dotest(5.E-5, 1, "-l 5 -d 2 -n 40 -dt 0.005"); // time error manifests here + dotest(1.E-1, 1, "-l 5 -d 0 -n 20 -sv direct"); + dotest(5.E-3, 1, "-l 5 -d 1 -n 20 -sv direct"); + dotest(5.E-5, 1, "-l 5 -d 2 -n 40 -dt 0.005 -sv direct"); // time error manifests here - dotest(1.00E-2, 2, "-l 6 -t 0.5 -dt 0.1"); // second order in time - dotest(2.50E-3, 2, "-l 6 -t 0.5 -dt 0.05"); - dotest(6.25E-4, 2, "-l 6 -t 0.5 -dt 0.025"); + dotest(1.00E-2, 2, "-l 6 -t 0.5 -dt 0.1 -sv direct"); // second order in time + dotest(2.50E-3, 2, "-l 6 -t 0.5 -dt 0.05 -sv direct"); + dotest(6.25E-4, 2, "-l 6 -t 0.5 -dt 0.025 -sv direct"); - dotest(1.E-2, 3, "-l 4 -t 0.5 -dt 0.1"); // direct solver multi-d + dotest(1.E-2, 3, "-l 4 -t 0.5 -dt 0.1 -sv direct"); // direct solver multi-d // in the first few steps here, the grid is very coarse // finer refinement and time-step is needed, only the final error is OK - longtest(2.E-3, 2, "-l 2 -m 8 -dt 0.01 -a 1.E-3"); // adapt - longtest(2.E-4, 2, "-l 2 -m 8 -dt 0.01 -a 1.E-4"); + longtest(2.E-3, 2, "-l 2 -m 8 -a 1.E-3"); // adapt + dotest(5.E-3, 2, "-l 8 -a 1.E-4"); #endif #ifdef ASGARD_ENABLE_FLOAT - dotest(1.E-2, 1, "-l 4 -n 10"); - dotest(1.E-2, 2, "-l 5 -t 0.5 -dt 0.1"); + dotest(1.E-2, 1, "-l 4 -n 10 -sv direct"); + dotest(1.E-2, 2, "-l 5 -t 0.5 -dt 0.1 -sv direct"); #endif } diff --git a/src/pde/asgard_pde_base.hpp b/src/pde/asgard_pde_base.hpp index ccd3d8696..55e23cb5d 100644 --- a/src/pde/asgard_pde_base.hpp +++ b/src/pde/asgard_pde_base.hpp @@ -876,8 +876,8 @@ class PDE options_.isolver_tolerance = solvers::notolerance; if (not options_.isolver_iterations) options_.isolver_iterations = solvers::novalue; - if (not options_.isolver_outer_iterations) - options_.isolver_outer_iterations = solvers::novalue; + if (not options_.isolver_inner_iterations) + options_.isolver_inner_iterations = solvers::novalue; } constexpr static int extract_dim0 = 1; @@ -2115,6 +2115,13 @@ class PDEv2 // setting up preconditioner for a possibly iterative solver if (not options_.precon and options_.default_precon) options_.precon = options_.default_precon.value(); + // setting up the solver options + if (not options_.isolver_tolerance and options_.default_isolver_tolerance) + options_.isolver_tolerance = options_.default_isolver_tolerance.value(); + if (not options_.isolver_iterations and options_.default_isolver_iterations) + options_.isolver_iterations = options_.default_isolver_iterations.value(); + if (not options_.isolver_inner_iterations and options_.default_isolver_inner_iterations) + options_.isolver_inner_iterations = options_.default_isolver_inner_iterations.value(); } // don't support l-inf norm yet