Skip to content

Commit

Permalink
docs and timing and testing
Browse files Browse the repository at this point in the history
  • Loading branch information
mkstoyanov committed Jan 23, 2025
1 parent 43d8857 commit 2ea0d86
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 15 deletions.
5 changes: 4 additions & 1 deletion src/asgard_block_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ namespace asgard
{

template<typename P>
void dense_matrix<P>::factorize() {
void dense_matrix<P>::factorize()
{
tools::time_event timing_("dense-matrix::factorize");
expect(nrows_ == ncols_);
ipiv.resize(nrows_);
int info = lib_dispatch::getrf(nrows_, ncols_, data_.data(), nrows_,
Expand All @@ -31,6 +33,7 @@ void dense_matrix<P>::factorize() {
template<typename P>
void dense_matrix<P>::solve(std::vector<P> &b) const
{
tools::time_event timing_("dense-matrix::solve");
expect(is_factorized());
int info = lib_dispatch::getrs('N', nrows_, 1, data_.data(), nrows_,
ipiv.data(), b.data(), nrows_);
Expand Down
6 changes: 6 additions & 0 deletions src/asgard_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ template<typename P>
direct<P>::direct(sparse_grid const &grid, connection_patterns const &conn,
term_manager<P> const &terms, P alpha)
{
tools::time_event timing_("forming dense matrix");
int const num_dims = grid.num_dims();
int const num_indexes = grid.num_indexes();
int const pdof = terms.legendre.pdof;
Expand Down Expand Up @@ -648,6 +649,7 @@ template<typename P>
int bicgstab<P>::solve(
operatoin_apply_lhs<P> apply_lhs, std::vector<P> const &rhs, std::vector<P> &x) const
{
tools::time_event timing_("bicgstab::solve");
int64_t const n = static_cast<int64_t>(rhs.size());
if (v.size() != rhs.size()) // the other temps are initialized with a copy
v.resize(n);
Expand Down Expand Up @@ -736,6 +738,7 @@ int gmres<P>::solve(
operatoin_apply_lhs<P> apply_lhs, std::vector<P> const &rhs,
std::vector<P> &x) const
{
tools::time_event timing_("gmres::solve");
int const n = static_cast<int>(rhs.size());
expect(n == static_cast<int>(x.size()));

Expand Down Expand Up @@ -925,6 +928,7 @@ void solver_manager<P>::update_grid(
sparse_grid const &grid, connection_patterns const &conn,
term_manager<P> const &terms, P alpha)
{
tools::time_event timing_("updating solver");
if (opt == solve_opts::direct)
var = solvers::direct<P>(grid, conn, terms, alpha);

Expand All @@ -934,6 +938,8 @@ ASGARD_OMP_PARFOR_SIMD
for (size_t i = 0; i < jacobi.size(); i++)
jacobi[i] = P{1} / (P{1} + alpha * jacobi[i]);
}

grid_gen = grid.generation();
}

template<typename P>
Expand Down
14 changes: 7 additions & 7 deletions src/asgard_solver_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,11 @@ void test_kronmult(prog_opts const &opts, P const tol_factor)
b.clone_onto_device();
fk::vector<P, mem_type::owner, resource::device> b_d =
b.clone_onto_device();
int const restart = solver::novalue;
int const max_iter = solver::novalue;
int const restart = solvers::novalue;
int const max_iter = solvers::novalue;
P const tolerance = std::is_same_v<float, P> ? 1e-6 : 1e-12;
solver::simple_gmres_euler(dt, imex_flag::unspecified, operator_matrices,
x_d, b_d, restart, max_iter, tolerance);
solvers::simple_gmres_euler(dt, imex_flag::unspecified, operator_matrices,
x_d, b_d, restart, max_iter, tolerance);
return x_d.clone_onto_host();
}();

Expand All @@ -162,10 +162,10 @@ void test_kronmult(prog_opts const &opts, P const tol_factor)
b.clone_onto_device();
fk::vector<P, mem_type::owner, resource::device> b_d =
b.clone_onto_device();
int const max_iter = solver::novalue;
int const max_iter = solvers::novalue;
P const tolerance = std::is_same_v<float, P> ? 1e-6 : 1e-12;
solver::bicgstab_euler(dt, imex_flag::unspecified, operator_matrices,
x_d, b_d, max_iter, tolerance);
solvers::bicgstab_euler(dt, imex_flag::unspecified, operator_matrices,
x_d, b_d, max_iter, tolerance);
return x_d.clone_onto_host();
}();

Expand Down
1 change: 1 addition & 0 deletions src/asgard_term_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,7 @@ void term_manager<P>::kron_diag(
{
std::array<P const *, max_num_dimensions> amats;

#pragma omp for
for (int i = 0; i < grid.num_indexes(); i++) {
for (int d : iindexof(num_dims))
if (tme.coeffs[d].empty())
Expand Down
39 changes: 32 additions & 7 deletions src/diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@
* Thus, applying Dirichlet boundary conditions on gradient operator results in
* Dirichlet boundary conditions for the field \b f, while applying Dirichlet
* conditions on the divergence yields Neumann conditions for the field.
*
* \par
* The condition number of the second order operator is the square of the first order one,
* and the corresponding CFL condition implies that the time step should be the square
* of the spacial resolution.
* Thus, the required number of time-steps is prohibitively high for any explicit
* time stepping method, and we want to use an implicit stepper.
* While more stable and less restrictive on the time-step,
* implicit methods come with the additional challenge of requiring a linear solver.
*/

/*!
Expand Down Expand Up @@ -97,12 +106,12 @@ asgard::PDEv2<P> make_diffusion_pde(int num_dims, asgard::prog_opts options) {
}

// only the iterative solvers will use these values
// jacobi is the fastest and (currently) most stable preconditioner
// jacobi is (currently) the fastest and most stable preconditioner
options.default_precon = asgard::preconditioner_opts::jacobi;

// the tolerance for the iterative solver should probably be updated
// based on the time-step and the max-level
// this is a tight number of remove the solver error from consideration
// this is a tight number which removes the solver error from consideration
options.default_isolver_tolerance = 1.E-7;

// the number of iterations should depends on the time-step and condition
Expand All @@ -111,7 +120,7 @@ asgard::PDEv2<P> make_diffusion_pde(int num_dims, asgard::prog_opts options) {

// GMRES uses a two-loop approach (restarted GMRES)
// the inner iterations explicitly form and manipulate the basis for the Krylov sub-space
// this requires lots of memory and the number here should be kept moderate
// which requires lots of memory and the number here should be kept moderate
// (memory usage is dominated by isolver_inner_iterations * degrees-of-freedom)
options.isolver_inner_iterations = 50;

Expand All @@ -122,17 +131,18 @@ asgard::PDEv2<P> make_diffusion_pde(int num_dims, asgard::prog_opts options) {

// one dimensional divergence term using upwind flux
// setting Dirichlet condition here will in fact yield Neumann boundary condition
// to the combined operator and respectively to the field
// for the combined operator and onto the field
asgard::term_1d<P> div = asgard::term_div(asgard::flux_type::upwind,
asgard::boundary_type::free,
P{-1});

// Dirichlet conditions applied to the grad term apply Dirichlet condition
// to the combined operator and respectively to the field
// for the combined operator and respectively the field
asgard::term_1d<P> grad = asgard::term_grad(asgard::flux_type::upwind,
asgard::boundary_type::dirichlet,
P{1});

// different way of operator chaining
if constexpr (chain1d)
{
// the second order operator is a chain of operators
Expand Down Expand Up @@ -349,7 +359,7 @@ int main(int argc, char** argv)

// the discretization_manager takes in a pde and handles sparse-grid construction
// separable and non-separable operators, holds the current state, etc.
asgard::discretization_manager<P> disc(make_diffusion_pde<P>(2, options),
asgard::discretization_manager<P> disc(make_diffusion_pde<P, false>(2, options),
asgard::verbosity_level::high);

// time-integration is performed using the advance_time() method
Expand All @@ -361,6 +371,17 @@ int main(int argc, char** argv)

asgard::advance_time(disc); // integrate until num-steps or stop-time

// alternative to the one-shot approach above, integration can be done step-by-step
// and verbose output can be generated
// in the code below, first the builtin reporting mechanism is set to quiet,
// and the error is computed for each time step
// disc.set_verbosity(asgard::verbosity_level::quiet);
// while (disc.time_params().num_remain() > 0) {
// asgard::advance_time(disc, 1);
// disc.progress_report();
// std::cout << " -- error: " << get_error_l2(disc) << "\n";
// }

disc.progress_report();

if (not disc.stop_verbosity())
Expand Down Expand Up @@ -441,6 +462,9 @@ void self_test() {
dotest<double, false>(1.E-4, 1, "-l 5 -n 20 -sv direct");
dotest<double, false>(5.E-5, 1, "-l 6 -n 20 -sv direct");

dotest<double>(1.E-4, 1, "-l 5 -n 10 -sv bicgstab"); // check the jacobi preconditioner
dotest<double, false>(1.E-4, 1, "-l 5 -n 10 -sv bicgstab"); // both chain-modes

dotest(1.E-1, 1, "-l 5 -d 0 -n 20 -sv direct");
dotest(5.E-3, 1, "-l 5 -d 1 -n 20 -sv direct");
dotest(5.E-5, 1, "-l 5 -d 2 -n 40 -dt 0.005 -sv direct"); // time error manifests here
Expand All @@ -453,7 +477,8 @@ void self_test() {

// in the first few steps here, the grid is very coarse
// finer refinement and time-step is needed, only the final error is OK
longtest(2.E-3, 2, "-l 2 -m 8 -a 1.E-3"); // adapt
longtest(2.E-3, 2, "-l 2 -m 8 -a 1.E-3");
longtest(2.E-3, 2, "-l 2 -m 8 -a 1.E-3 -sv direct"); // check if solver is updated
dotest(5.E-3, 2, "-l 8 -a 1.E-4");
#endif

Expand Down

0 comments on commit 2ea0d86

Please sign in to comment.