added gmres solver

project-asgard · Jan 23, 2025 · 19d8e8e · 19d8e8e
1 parent 3eb8401
commit 19d8e8e
Show file tree

Hide file tree

Showing 6 changed files with 306 additions and 2 deletions.
diff --git a/src/asgard_discretization.hpp b/src/asgard_discretization.hpp
@@ -167,6 +167,13 @@ class discretization_manager
     tools::time_event performance_("terms_apply_all kronmult");
     terms.apply_all(sgrid, conn, alpha, x, beta, y);
   }
+  //! applies all terms, non-owning array signature
+  void terms_apply_all(precision alpha, precision const x[], precision beta,
+                       precision y[]) const
+  {
+    tools::time_event performance_("terms_apply_all kronmult");
+    terms.apply_all(sgrid, conn, alpha, x, beta, y);
+  }
   //! applies ADI preconditioner for all terms
   void terms_apply_adi(std::vector<precision> const &x,
                        std::vector<precision> &y) const

diff --git a/src/asgard_solver.cpp b/src/asgard_solver.cpp
@@ -730,8 +730,94 @@ ASGARD_OMP_PARFOR_SIMD
   return num_appy;
 }
 
+template<typename P>
+int gmres<P>::solve(
+    operatoin_apply_precon_arr<P> apply_precon,
+    operatoin_apply_lhs_arr<P> apply_lhs, std::vector<P> const &rhs,
+    std::vector<P> &x) const
+{
+  int const n = static_cast<int>(rhs.size());
+  expect(n == static_cast<int>(x.size()));
+
+  basis.resize(static_cast<int64_t>(n) * (max_inner_ + 1));
+
+  int num_appy = 0;
+
+  // int total_iterations = 0;
+  int outer_iterations = 0;
+  int inner_iterations = 0;
+
+  P inner_res = 0.;
+  P outer_res = tolerance_ + 1.;
+  while ((outer_res > tolerance_) && (outer_iterations < max_outer_))
+  {
+    std::copy(rhs.begin(), rhs.end(), basis.begin());
+    apply_lhs(-1, x.data(), 1, basis.data());
+    apply_precon(basis.data());
+    ++num_appy;
+
+    inner_res = lib_dispatch::nrm2(n, basis.data(), 1);
+    lib_dispatch::scal(n, P{1} / inner_res, basis.data(), 1);
+    krylov_sol[0] = inner_res;
+
+    inner_iterations = 0;
+    while (inner_res > tolerance_ and inner_iterations < max_inner_)
+    {
+      P *r = basis.data() + static_cast<int64_t>(n) * (inner_iterations + 1);
+      apply_lhs(1, basis.data() + static_cast<int64_t>(n) * inner_iterations, 0, r);
+      apply_precon(r);
+      ++num_appy;
+
+      // krylov projection coefficients for this iteration
+      P *coeff = krylov_proj + (inner_iterations * (inner_iterations + 1)) / 2;
+
+      lib_dispatch::gemv('T', n, inner_iterations + 1, P{1}, basis.data(), n,
+                         r, 1, P{0}, coeff, 1);
+      lib_dispatch::gemv('N', n, inner_iterations + 1, P{-1}, basis.data(), n,
+                         coeff, 1, P{1}, r, 1);
+
+      P const nrm = lib_dispatch::nrm2(n, r, 1);
+      lib_dispatch::scal(n, P{1} / nrm, r, 1);
+      for (int k = 0; k < inner_iterations; k++)
+        lib_dispatch::rot(1, coeff + k, 1, coeff + k + 1, 1,
+                          cosines[k], sines[k]);
+
+      // compute given's rotation
+      P beta = nrm;
+      lib_dispatch::rotg(coeff + inner_iterations, &beta,
+                         cosines + inner_iterations,
+                         sines + inner_iterations);
+
+      inner_res =
+          std::abs(sines[inner_iterations] * krylov_sol[inner_iterations]);
+
+      if (inner_res > tolerance_ and inner_iterations < max_inner_)
+      {
+        krylov_sol[inner_iterations + 1] = 0.;
+        lib_dispatch::rot(1, krylov_sol + inner_iterations, 1,
+                          krylov_sol + inner_iterations + 1, 1,
+                          cosines[inner_iterations], sines[inner_iterations]);
+      }
+
+      ++inner_iterations;
+    } // end of inner iteration loop
+
+    if (inner_iterations > 0)
+    {
+      lib_dispatch::tpsv('U', 'N', 'N', inner_iterations, krylov_proj, krylov_sol, 1);
+      lib_dispatch::gemv('N', n, inner_iterations, P{1}, basis.data(), n,
+                         krylov_sol, 1, P{1}, x.data(), 1);
+    }
+    ++outer_iterations;
+    outer_res = inner_res;
+  } // end outer iteration
+
+  return num_appy;
+}
+
 #ifdef ASGARD_ENABLE_DOUBLE
 template class direct<double>;
+template class bicgstab<double>;
 
 template gmres_info<double>
 simple_gmres(fk::matrix<double> const &A, fk::vector<double> &x,
@@ -780,6 +866,7 @@ template void poisson_data<double>::solve(
 
 #ifdef ASGARD_ENABLE_FLOAT
 template class direct<float>;
+template class bicgstab<float>;
 
 template gmres_info<float>
 simple_gmres(fk::matrix<float> const &A, fk::vector<float> &x,
@@ -864,6 +951,13 @@ void solver_manager<P>::print_opts(std::ostream &os) const
       os << "  max iterations: " << std::get<solvers::bicgstab<P>>(var).max_iter() << '\n';
       has_precon = true;
       break;
+    case 2:
+      os << "  gmres\n";
+      os << "  tolerance: " << std::get<solvers::gmres<P>>(var).tolerance() << '\n';
+      os << "  max inner: " << std::get<solvers::gmres<P>>(var).max_inner() << '\n';
+      os << "  max outer: " << std::get<solvers::gmres<P>>(var).max_outer() << '\n';
+      has_precon = true;
+      break;
     default:
       break;
   }

diff --git a/src/asgard_solver.hpp b/src/asgard_solver.hpp
@@ -176,6 +176,18 @@ template<typename P>
 using operatoin_apply_lhs =
   std::function<void(P alpha, std::vector<P> const &x, P beta, std::vector<P> &y)>;
 
+/*!
+ * \internal
+ * \brief Signature for the left-hand linear operation for a solver, raw-array variant
+ *
+ * Computes `y = alpha * A * x + beta * y`
+ *
+ * \endinternal
+ */
+template<typename P>
+using operatoin_apply_lhs_arr =
+  std::function<void(P alpha, P const x[], P beta, P y[])>;
+
 /*!
  * \internal
  * \brief Signature for the preconditioner
@@ -188,6 +200,18 @@ template<typename P>
 using operatoin_apply_precon =
   std::function<void(std::vector<P> const &x, std::vector<P> &y)>;
 
+/*!
+ * \internal
+ * \brief Signature for the preconditioner
+ *
+ * Computes `y = inverse-P * y` and do not need the constants
+ * from the asgard::operatoin_apply_lhs
+ *
+ * \endinternal
+ */
+template<typename P>
+using operatoin_apply_precon_arr = std::function<void(P y[])>;
+
 /*!
  * \internal
  * \brief BiCGSTAB method combines Conjugate-Gradient and GMRES
@@ -227,6 +251,61 @@ class bicgstab
   mutable std::vector<P> rref, r, p, v, t;
 };
 
+/*!
+ * \internal
+ * \brief General Minimum Residual solver - GMRES
+ *
+ * Implements the restarted version with given max-number of inner and outer
+ * iterations. The class mostly holds workspace vectors.
+ * \endinternal
+ */
+template<typename P>
+class gmres
+{
+public:
+  //! default constructor, nothing to do
+  gmres() = default;
+
+  //! construct and set the tolerance and maximum number of iterations
+  gmres(P tol, int maxi, int maxo)
+    : tolerance_(tol), max_inner_(maxi), max_outer_(maxo)
+  {
+    krylov_data.resize(3 * (max_inner_ + 1) + ((max_inner_ + 1) * max_inner_) / 2);
+
+    P *data = krylov_data.data();
+    krylov_proj = std::exchange(data, data + ((max_inner_ + 1) * max_inner_ / 2));
+    sines       = std::exchange(data, data + max_inner_ + 1);
+    cosines     = std::exchange(data, data + max_inner_ + 1);
+    krylov_sol  = std::exchange(data, data + max_inner_ + 1);
+    expect(data == krylov_data.data() + krylov_data.size());
+  }
+
+  //! solve for the given linear operators, right-hand-side and initial iterate
+  int solve(operatoin_apply_precon_arr<P> apply_precon,
+            operatoin_apply_lhs_arr<P> apply_lhs, std::vector<P> const &rhs,
+            std::vector<P> &x) const;
+
+  //! returns the set tolerance
+  P tolerance() const { return tolerance_; }
+  //! returns the set max-number of iterations
+  int max_inner() const { return max_inner_; }
+  //! returns the max-number of restarts
+  int max_outer() const { return max_outer_; }
+
+private:
+  P tolerance_   = 0;
+  int max_inner_ = 0;
+  int max_outer_ = 0;
+
+  mutable std::vector<P> basis;
+
+  mutable std::vector<P> krylov_data;
+  mutable P *krylov_proj = nullptr;
+  mutable P *sines       = nullptr;
+  mutable P *cosines     = nullptr;
+  mutable P *krylov_sol  = nullptr;
+};
+
 } // namespace asgard::solvers
 
 namespace asgard
@@ -263,6 +342,18 @@ struct solver_manager
                                    options.isolver_iterations.value());
         precon = options.precon.value_or(preconditioner_opts::none);
         break;
+      case solve_opts::gmres:
+        rassert(options.isolver_tolerance,
+                "missing tolerance for the iterative solver gmres");
+        rassert(options.isolver_iterations,
+                "missing number of iterations for the iterative solver gmres");
+        rassert(options.isolver_outer_iterations,
+                "missing number of outer iterations for the iterative solver gmres");
+        var = solvers::gmres<P>(options.isolver_tolerance.value(),
+                                options.isolver_iterations.value(),
+                                options.isolver_outer_iterations.value());
+        precon = options.precon.value_or(preconditioner_opts::none);
+        break;
       default: // unreachable
         break;
     }
@@ -315,6 +406,29 @@ struct solver_manager
     }
   }
 
+  //! iterative solver, calls the appropriate iterative solver
+  void iterate_solve(solvers::operatoin_apply_lhs_arr<P> apply_lhs,
+                     std::vector<P> const &rhs, std::vector<P> &x) const
+  {
+    iterate_solve(nullptr, apply_lhs, rhs, x);
+  }
+
+  //! iterative solver, calls the appropriate iterative solver
+  void iterate_solve(solvers::operatoin_apply_precon_arr<P> precon,
+                     solvers::operatoin_apply_lhs_arr<P> apply_lhs,
+                     std::vector<P> const &rhs, std::vector<P> &x) const
+  {
+    expect(opt == solve_opts::gmres);
+    if (precon) {
+      solvers::gmres<P> const &gmres = std::get<solvers::gmres<P>>(var);
+
+      num_apply += gmres.solve(precon, apply_lhs, rhs, x);
+    } else {
+      num_apply += std::get<solvers::gmres<P>>(var).solve(
+        [](P *)->void{ /* no preconditioner */ }, apply_lhs, rhs, x);
+    }
+  }
+
   //! updates the internals for the current grid generation
   void update_grid(sparse_grid const &grid,
                    connection_patterns const &conn,
@@ -332,7 +446,7 @@ struct solver_manager
   //! remembers the generation of the grid that was used to last set the manager
   int grid_gen = -1;
   //! holds the actual solver instance
-  std::variant<solvers::direct<P>, solvers::bicgstab<P>> var;
+  std::variant<solvers::direct<P>, solvers::bicgstab<P>, solvers::gmres<P>> var;
   //! holds data for the jacobi preconditioner
   std::vector<P> jacobi;
 };

diff --git a/src/asgard_term_manager.cpp b/src/asgard_term_manager.cpp
@@ -335,6 +335,36 @@ void term_manager<P>::apply_all(
     b = 1; // next iteration appends on y
   }
 }
+template<typename P>
+void term_manager<P>::apply_all(
+    sparse_grid const &grid, connection_patterns const &conns,
+    P alpha, P const x[], P beta, P y[]) const
+{
+  P b = beta; // on first iteration, overwrite y
+
+  auto it = terms.begin();
+  while (it < terms.end())
+  {
+    if (it->num_chain == 1) {
+      kron_term(grid, conns, *it, alpha, x, b, y);
+      ++it;
+    } else {
+      // dealing with a chain
+      int const num_chain = it->num_chain;
+
+      kron_term(grid, conns, *(it + num_chain - 1), 1, x, 0, t1.data());
+      for (int i = num_chain - 2; i > 0; --i) {
+        kron_term(grid, conns, *(it + i), 1, t1, 0, t2);
+        std::swap(t1, t2);
+      }
+      kron_term(grid, conns, *it, alpha, t1.data(), b, y);
+
+      it += it->num_chain;
+    }
+
+    b = 1; // next iteration appends on y
+  }
+}
 
 template<typename P>
 void term_manager<P>::apply_all_adi(

diff --git a/src/asgard_term_manager.hpp b/src/asgard_term_manager.hpp
@@ -86,6 +86,9 @@ struct term_manager
   //! y = sum(terms * x), applies all terms
   void apply_all(sparse_grid const &grid, connection_patterns const &conns,
                  P alpha, std::vector<P> const &x, P beta, std::vector<P> &y) const;
+  //! y = sum(terms * x), applies all terms
+  void apply_all(sparse_grid const &grid, connection_patterns const &conns,
+                 P alpha, P const x[], P beta, P y[]) const;
 
   //! y = prod(terms_adi * x), applies the ADI preconditioning to all terms
   void apply_all_adi(sparse_grid const &grid, connection_patterns const &conns,
@@ -103,6 +106,13 @@ struct term_manager
     block_cpu(legendre.pdof, grid, conns, tme.perm, tme.coeffs,
               alpha, x.data(), beta, y.data(), kwork);
   }
+  //! y = alpha * tme * x + beta * y, assumes workspace has been set and x/y have proper size
+  void kron_term(sparse_grid const &grid, connection_patterns const &conns,
+                 term_entry<P> const &tme, P alpha, P const x[], P beta, P y[]) const
+  {
+    block_cpu(legendre.pdof, grid, conns, tme.perm, tme.coeffs,
+              alpha, x, beta, y, kwork);
+  }
   void kron_term_adi(sparse_grid const &grid, connection_patterns const &conns,
                      term_entry<P> const &tme, P alpha, std::vector<P> const &x, P beta,
                      std::vector<P> &y) const