Skip to content

Commit

Permalink
added gmres solver
Browse files Browse the repository at this point in the history
  • Loading branch information
mkstoyanov committed Jan 23, 2025
1 parent 3eb8401 commit 19d8e8e
Show file tree
Hide file tree
Showing 6 changed files with 306 additions and 2 deletions.
7 changes: 7 additions & 0 deletions src/asgard_discretization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,13 @@ class discretization_manager
tools::time_event performance_("terms_apply_all kronmult");
terms.apply_all(sgrid, conn, alpha, x, beta, y);
}
//! applies all terms, non-owning array signature
void terms_apply_all(precision alpha, precision const x[], precision beta,
precision y[]) const
{
tools::time_event performance_("terms_apply_all kronmult");
terms.apply_all(sgrid, conn, alpha, x, beta, y);
}
//! applies ADI preconditioner for all terms
void terms_apply_adi(std::vector<precision> const &x,
std::vector<precision> &y) const
Expand Down
94 changes: 94 additions & 0 deletions src/asgard_solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,8 +730,94 @@ ASGARD_OMP_PARFOR_SIMD
return num_appy;
}

template<typename P>
int gmres<P>::solve(
operatoin_apply_precon_arr<P> apply_precon,
operatoin_apply_lhs_arr<P> apply_lhs, std::vector<P> const &rhs,
std::vector<P> &x) const
{
int const n = static_cast<int>(rhs.size());
expect(n == static_cast<int>(x.size()));

basis.resize(static_cast<int64_t>(n) * (max_inner_ + 1));

int num_appy = 0;

// int total_iterations = 0;
int outer_iterations = 0;
int inner_iterations = 0;

P inner_res = 0.;
P outer_res = tolerance_ + 1.;
while ((outer_res > tolerance_) && (outer_iterations < max_outer_))
{
std::copy(rhs.begin(), rhs.end(), basis.begin());
apply_lhs(-1, x.data(), 1, basis.data());
apply_precon(basis.data());
++num_appy;

inner_res = lib_dispatch::nrm2(n, basis.data(), 1);
lib_dispatch::scal(n, P{1} / inner_res, basis.data(), 1);
krylov_sol[0] = inner_res;

inner_iterations = 0;
while (inner_res > tolerance_ and inner_iterations < max_inner_)
{
P *r = basis.data() + static_cast<int64_t>(n) * (inner_iterations + 1);
apply_lhs(1, basis.data() + static_cast<int64_t>(n) * inner_iterations, 0, r);
apply_precon(r);
++num_appy;

// krylov projection coefficients for this iteration
P *coeff = krylov_proj + (inner_iterations * (inner_iterations + 1)) / 2;

lib_dispatch::gemv('T', n, inner_iterations + 1, P{1}, basis.data(), n,
r, 1, P{0}, coeff, 1);
lib_dispatch::gemv('N', n, inner_iterations + 1, P{-1}, basis.data(), n,
coeff, 1, P{1}, r, 1);

P const nrm = lib_dispatch::nrm2(n, r, 1);
lib_dispatch::scal(n, P{1} / nrm, r, 1);
for (int k = 0; k < inner_iterations; k++)
lib_dispatch::rot(1, coeff + k, 1, coeff + k + 1, 1,
cosines[k], sines[k]);

// compute given's rotation
P beta = nrm;
lib_dispatch::rotg(coeff + inner_iterations, &beta,
cosines + inner_iterations,
sines + inner_iterations);

inner_res =
std::abs(sines[inner_iterations] * krylov_sol[inner_iterations]);

if (inner_res > tolerance_ and inner_iterations < max_inner_)
{
krylov_sol[inner_iterations + 1] = 0.;
lib_dispatch::rot(1, krylov_sol + inner_iterations, 1,
krylov_sol + inner_iterations + 1, 1,
cosines[inner_iterations], sines[inner_iterations]);
}

++inner_iterations;
} // end of inner iteration loop

if (inner_iterations > 0)
{
lib_dispatch::tpsv('U', 'N', 'N', inner_iterations, krylov_proj, krylov_sol, 1);
lib_dispatch::gemv('N', n, inner_iterations, P{1}, basis.data(), n,
krylov_sol, 1, P{1}, x.data(), 1);
}
++outer_iterations;
outer_res = inner_res;
} // end outer iteration

return num_appy;
}

#ifdef ASGARD_ENABLE_DOUBLE
template class direct<double>;
template class bicgstab<double>;

template gmres_info<double>
simple_gmres(fk::matrix<double> const &A, fk::vector<double> &x,
Expand Down Expand Up @@ -780,6 +866,7 @@ template void poisson_data<double>::solve(

#ifdef ASGARD_ENABLE_FLOAT
template class direct<float>;
template class bicgstab<float>;

template gmres_info<float>
simple_gmres(fk::matrix<float> const &A, fk::vector<float> &x,
Expand Down Expand Up @@ -864,6 +951,13 @@ void solver_manager<P>::print_opts(std::ostream &os) const
os << " max iterations: " << std::get<solvers::bicgstab<P>>(var).max_iter() << '\n';
has_precon = true;
break;
case 2:
os << " gmres\n";
os << " tolerance: " << std::get<solvers::gmres<P>>(var).tolerance() << '\n';
os << " max inner: " << std::get<solvers::gmres<P>>(var).max_inner() << '\n';
os << " max outer: " << std::get<solvers::gmres<P>>(var).max_outer() << '\n';
has_precon = true;
break;
default:
break;
}
Expand Down
116 changes: 115 additions & 1 deletion src/asgard_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,18 @@ template<typename P>
using operatoin_apply_lhs =
std::function<void(P alpha, std::vector<P> const &x, P beta, std::vector<P> &y)>;

/*!
* \internal
* \brief Signature for the left-hand linear operation for a solver, raw-array variant
*
* Computes `y = alpha * A * x + beta * y`
*
* \endinternal
*/
template<typename P>
using operatoin_apply_lhs_arr =
std::function<void(P alpha, P const x[], P beta, P y[])>;

/*!
* \internal
* \brief Signature for the preconditioner
Expand All @@ -188,6 +200,18 @@ template<typename P>
using operatoin_apply_precon =
std::function<void(std::vector<P> const &x, std::vector<P> &y)>;

/*!
* \internal
* \brief Signature for the preconditioner
*
* Computes `y = inverse-P * y` and do not need the constants
* from the asgard::operatoin_apply_lhs
*
* \endinternal
*/
template<typename P>
using operatoin_apply_precon_arr = std::function<void(P y[])>;

/*!
* \internal
* \brief BiCGSTAB method combines Conjugate-Gradient and GMRES
Expand Down Expand Up @@ -227,6 +251,61 @@ class bicgstab
mutable std::vector<P> rref, r, p, v, t;
};

/*!
* \internal
* \brief General Minimum Residual solver - GMRES
*
* Implements the restarted version with given max-number of inner and outer
* iterations. The class mostly holds workspace vectors.
* \endinternal
*/
template<typename P>
class gmres
{
public:
//! default constructor, nothing to do
gmres() = default;

//! construct and set the tolerance and maximum number of iterations
gmres(P tol, int maxi, int maxo)
: tolerance_(tol), max_inner_(maxi), max_outer_(maxo)
{
krylov_data.resize(3 * (max_inner_ + 1) + ((max_inner_ + 1) * max_inner_) / 2);

P *data = krylov_data.data();
krylov_proj = std::exchange(data, data + ((max_inner_ + 1) * max_inner_ / 2));
sines = std::exchange(data, data + max_inner_ + 1);
cosines = std::exchange(data, data + max_inner_ + 1);
krylov_sol = std::exchange(data, data + max_inner_ + 1);
expect(data == krylov_data.data() + krylov_data.size());
}

//! solve for the given linear operators, right-hand-side and initial iterate
int solve(operatoin_apply_precon_arr<P> apply_precon,
operatoin_apply_lhs_arr<P> apply_lhs, std::vector<P> const &rhs,
std::vector<P> &x) const;

//! returns the set tolerance
P tolerance() const { return tolerance_; }
//! returns the set max-number of iterations
int max_inner() const { return max_inner_; }
//! returns the max-number of restarts
int max_outer() const { return max_outer_; }

private:
P tolerance_ = 0;
int max_inner_ = 0;
int max_outer_ = 0;

mutable std::vector<P> basis;

mutable std::vector<P> krylov_data;
mutable P *krylov_proj = nullptr;
mutable P *sines = nullptr;
mutable P *cosines = nullptr;
mutable P *krylov_sol = nullptr;
};

} // namespace asgard::solvers

namespace asgard
Expand Down Expand Up @@ -263,6 +342,18 @@ struct solver_manager
options.isolver_iterations.value());
precon = options.precon.value_or(preconditioner_opts::none);
break;
case solve_opts::gmres:
rassert(options.isolver_tolerance,
"missing tolerance for the iterative solver gmres");
rassert(options.isolver_iterations,
"missing number of iterations for the iterative solver gmres");
rassert(options.isolver_outer_iterations,
"missing number of outer iterations for the iterative solver gmres");
var = solvers::gmres<P>(options.isolver_tolerance.value(),
options.isolver_iterations.value(),
options.isolver_outer_iterations.value());
precon = options.precon.value_or(preconditioner_opts::none);
break;
default: // unreachable
break;
}
Expand Down Expand Up @@ -315,6 +406,29 @@ struct solver_manager
}
}

//! iterative solver, calls the appropriate iterative solver
void iterate_solve(solvers::operatoin_apply_lhs_arr<P> apply_lhs,
std::vector<P> const &rhs, std::vector<P> &x) const
{
iterate_solve(nullptr, apply_lhs, rhs, x);
}

//! iterative solver, calls the appropriate iterative solver
void iterate_solve(solvers::operatoin_apply_precon_arr<P> precon,
solvers::operatoin_apply_lhs_arr<P> apply_lhs,
std::vector<P> const &rhs, std::vector<P> &x) const
{
expect(opt == solve_opts::gmres);
if (precon) {
solvers::gmres<P> const &gmres = std::get<solvers::gmres<P>>(var);

num_apply += gmres.solve(precon, apply_lhs, rhs, x);
} else {
num_apply += std::get<solvers::gmres<P>>(var).solve(
[](P *)->void{ /* no preconditioner */ }, apply_lhs, rhs, x);
}
}

//! updates the internals for the current grid generation
void update_grid(sparse_grid const &grid,
connection_patterns const &conn,
Expand All @@ -332,7 +446,7 @@ struct solver_manager
//! remembers the generation of the grid that was used to last set the manager
int grid_gen = -1;
//! holds the actual solver instance
std::variant<solvers::direct<P>, solvers::bicgstab<P>> var;
std::variant<solvers::direct<P>, solvers::bicgstab<P>, solvers::gmres<P>> var;
//! holds data for the jacobi preconditioner
std::vector<P> jacobi;
};
Expand Down
30 changes: 30 additions & 0 deletions src/asgard_term_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,36 @@ void term_manager<P>::apply_all(
b = 1; // next iteration appends on y
}
}
template<typename P>
void term_manager<P>::apply_all(
sparse_grid const &grid, connection_patterns const &conns,
P alpha, P const x[], P beta, P y[]) const
{
P b = beta; // on first iteration, overwrite y

auto it = terms.begin();
while (it < terms.end())
{
if (it->num_chain == 1) {
kron_term(grid, conns, *it, alpha, x, b, y);
++it;
} else {
// dealing with a chain
int const num_chain = it->num_chain;

kron_term(grid, conns, *(it + num_chain - 1), 1, x, 0, t1.data());
for (int i = num_chain - 2; i > 0; --i) {
kron_term(grid, conns, *(it + i), 1, t1, 0, t2);
std::swap(t1, t2);
}
kron_term(grid, conns, *it, alpha, t1.data(), b, y);

it += it->num_chain;
}

b = 1; // next iteration appends on y
}
}

template<typename P>
void term_manager<P>::apply_all_adi(
Expand Down
10 changes: 10 additions & 0 deletions src/asgard_term_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ struct term_manager
//! y = sum(terms * x), applies all terms
void apply_all(sparse_grid const &grid, connection_patterns const &conns,
P alpha, std::vector<P> const &x, P beta, std::vector<P> &y) const;
//! y = sum(terms * x), applies all terms
void apply_all(sparse_grid const &grid, connection_patterns const &conns,
P alpha, P const x[], P beta, P y[]) const;

//! y = prod(terms_adi * x), applies the ADI preconditioning to all terms
void apply_all_adi(sparse_grid const &grid, connection_patterns const &conns,
Expand All @@ -103,6 +106,13 @@ struct term_manager
block_cpu(legendre.pdof, grid, conns, tme.perm, tme.coeffs,
alpha, x.data(), beta, y.data(), kwork);
}
//! y = alpha * tme * x + beta * y, assumes workspace has been set and x/y have proper size
void kron_term(sparse_grid const &grid, connection_patterns const &conns,
term_entry<P> const &tme, P alpha, P const x[], P beta, P y[]) const
{
block_cpu(legendre.pdof, grid, conns, tme.perm, tme.coeffs,
alpha, x, beta, y, kwork);
}
void kron_term_adi(sparse_grid const &grid, connection_patterns const &conns,
term_entry<P> const &tme, P alpha, std::vector<P> const &x, P beta,
std::vector<P> &y) const
Expand Down
Loading

0 comments on commit 19d8e8e

Please sign in to comment.