Skip to content

Commit

Permalink
Add parallel gmres implmentation using existing distribution of matri…
Browse files Browse the repository at this point in the history
…x abtraction.

Only tested on continuity_1 CPU and 4 processes. Need to try more PDEs with
different numbers of processes.

Haven't tried with GPUs. I think this will be easiest with CUDA-aware MPI.

Kept existing serial implementation to avoid additional copies. We may want to
revisit this soon.

Signed-off-by: Steven Hahn <hahnse@ornl.gov>
  • Loading branch information
quantumsteve committed Jan 25, 2024
1 parent 39f1cf8 commit 8f11cea
Show file tree
Hide file tree
Showing 10 changed files with 378 additions and 21 deletions.
4 changes: 2 additions & 2 deletions src/asgard_resources.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace asgard
{
// used to suppress warnings in unused variables
auto const ignore = [](auto ignored) { (void)ignored; };
auto const ignore = [](auto const &ignored) { (void)ignored; };

/*!
* \brief Default precision to use, double if enabled and float otherwise.
Expand Down Expand Up @@ -115,4 +115,4 @@ void memcpy_2d(P *dest, int const dest_stride, P const *const source,
#include "asgard_resources_cuda.tpp"
#else
#include "asgard_resources_host.tpp"
#endif
#endif
71 changes: 70 additions & 1 deletion src/distribution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ void reduce_results(fk::vector<P, src_mem, resrc> const &source,
MPI_Comm_split(global_communicator, my_row, my_col, &row_communicator);
expect(success == 0);

MPI_Datatype const mpi_type =
MPI_Datatype constexpr mpi_type =
std::is_same_v<P, double> ? MPI_DOUBLE : MPI_FLOAT;
success = MPI_Allreduce((void *)source.data(), (void *)dest.data(),
source.size(), mpi_type, MPI_SUM, row_communicator);
Expand All @@ -330,6 +330,49 @@ void reduce_results(fk::vector<P, src_mem, resrc> const &source,
#endif
}

template<typename P, std::enable_if_t<std::is_floating_point<P>::value, bool> = true>
void reduce_results(P const source,
P &dest,
distribution_plan const &plan, int const my_rank)
{
expect(my_rank >= 0);
expect(my_rank < static_cast<int>(plan.size()));

#ifdef ASGARD_USE_MPI
if (plan.size() == 1)
{
dest = source;
return;
}

dest = 0.;
int const num_cols = get_num_subgrid_cols(plan.size());

int const my_row = my_rank / num_cols;
int const my_col = my_rank % num_cols;

MPI_Comm row_communicator;
MPI_Comm const global_communicator = distro_handle.get_global_comm();

auto success =
MPI_Comm_split(global_communicator, my_row, my_col, &row_communicator);
expect(success == 0);

MPI_Datatype constexpr mpi_type =
std::is_same_v<P, double> ? MPI_DOUBLE : MPI_FLOAT;
success = MPI_Allreduce((void *)&source, (void *)&dest,
1, mpi_type, MPI_SUM, row_communicator);
expect(success == 0);

success = MPI_Comm_free(&row_communicator);
expect(success == 0);

#else
dest = source;
return;
#endif
}

//
// -- below functionality for exchanging solution vector data across subgrid
// rows via point-to-point messages.
Expand Down Expand Up @@ -1170,16 +1213,34 @@ void scatter_matrix(P const *A, int const *descA, P *A_distr,

#ifdef ASGARD_ENABLE_DOUBLE

template void reduce_results(double const source, double &dest,
distribution_plan const &plan, int const my_rank);

template void reduce_results(
fk::vector<double, mem_type::owner, resource::host> const &source,
fk::vector<double, mem_type::owner, resource::host> &dest,
distribution_plan const &plan, int const my_rank);

template void reduce_results(
fk::vector<double, mem_type::owner, resource::host> const &source,
fk::vector<double, mem_type::view, resource::host> &dest,
distribution_plan const &plan, int const my_rank);

template void exchange_results(
fk::vector<double, mem_type::owner, resource::host> const &source,
fk::vector<double, mem_type::owner, resource::host> &dest,
int const segment_size, distribution_plan const &plan, int const my_rank);

template void exchange_results(
fk::vector<double, mem_type::view, resource::host> const &source,
fk::vector<double, mem_type::owner, resource::host> &dest,
int const segment_size, distribution_plan const &plan, int const my_rank);

template void exchange_results(
fk::vector<double, mem_type::owner, resource::host> const &source,
fk::vector<double, mem_type::view, resource::host> &dest,
int const segment_size, distribution_plan const &plan, int const my_rank);

#ifdef ASGARD_USE_CUDA
template void reduce_results(
fk::vector<double, mem_type::owner, resource::device> const &source,
Expand Down Expand Up @@ -1220,11 +1281,19 @@ template void scatter_matrix<double>(double const *A, int const *descA,

#ifdef ASGARD_ENABLE_FLOAT

template void reduce_results(float const source, float &dest,
distribution_plan const &plan, int const my_rank);

template void
reduce_results(fk::vector<float, mem_type::owner, resource::host> const &source,
fk::vector<float, mem_type::owner, resource::host> &dest,
distribution_plan const &plan, int const my_rank);

template void
reduce_results(fk::vector<float, mem_type::owner, resource::host> const &source,
fk::vector<float, mem_type::view, resource::host> &dest,
distribution_plan const &plan, int const my_rank);

template void exchange_results(
fk::vector<float, mem_type::owner, resource::host> const &source,
fk::vector<float, mem_type::owner, resource::host> &dest,
Expand Down
4 changes: 4 additions & 0 deletions src/distribution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,10 @@ template<typename P, mem_type src_mem, mem_type dst_mem, resource resrc>
void reduce_results(fk::vector<P, src_mem, resrc> const &source,
fk::vector<P, dst_mem, resrc> &dest,
distribution_plan const &plan, int const my_rank);
template<typename P, std::enable_if_t<std::is_floating_point<P>::value, bool> = true>
void reduce_results(P const source,
P &dest,
distribution_plan const &plan, int const my_rank);

// generate a message list for each rank for exchange_results function;
// conceptually an internal component function, exposed for testing
Expand Down
78 changes: 78 additions & 0 deletions src/distribution_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,84 @@ TEMPLATE_TEST_CASE("allreduce across row of subgrids", "[distribution]",
}
}

TEMPLATE_TEST_CASE("allreduce element across row", "[distribution]",
test_precs)
{
if (!is_active())
{
return;
}

SECTION("1 rank")
{
auto const num_ranks = 1;
auto const my_rank = 0;
int const degree = 4;
int const level = 2;

options const o = make_options(
{"-l", std::to_string(level), "-d", std::to_string(degree)});
auto const pde =
make_PDE<default_precision>(PDE_opts::continuity_2, level, degree);
elements::table const table(o, *pde);

auto const plan = get_plan(num_ranks, table);

TestType const gold{42};
TestType const x = gold;
TestType fx = 0.;
reduce_results(x, fx, plan, my_rank);
REQUIRE(fx == gold);
}

SECTION("multiple ranks")
{
#ifdef ASGARD_USE_MPI

int const my_rank = distrib_test_info.get_my_rank();
int const num_ranks = distrib_test_info.get_num_ranks();

if (my_rank < num_ranks)
{
int const degree = 5;
int const level = 4;

options const o = make_options(
{"-l", std::to_string(level), "-d", std::to_string(degree)});

auto const pde =
make_PDE<default_precision>(PDE_opts::continuity_3, level, degree);
elements::table const table(o, *pde);

auto const plan = get_plan(num_ranks, table);

std::vector<TestType> rank_outputs;
for (int i = 0; i < static_cast<int>(plan.size()); ++i)
{

rank_outputs.push_back(i);
}
int const my_row = my_rank / get_num_subgrid_cols(num_ranks);
TestType gold;
for (int i = 0; i < static_cast<int>(rank_outputs.size()); ++i)
{
if (i / get_num_subgrid_cols(num_ranks) == my_row)
{
gold = gold + rank_outputs[i];
}
}

auto const &x = rank_outputs[std::min(my_rank, static_cast<int>(plan.size()) - 1)];
TestType fx = 0.;
reduce_results(x, fx, plan, my_rank);
REQUIRE_THAT(fx, Catch::Matchers::WithinAbs(gold, std::numeric_limits<TestType>::epsilon()));
}
#else
REQUIRE(true);
#endif
}
}

void generate_messages_test(int const num_ranks, elements::table const &table)
{
auto const plan = get_plan(num_ranks, table);
Expand Down
2 changes: 1 addition & 1 deletion src/program_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ parser::parser(int argc, char const *const *argv)
if ((use_implicit_stepping || use_imex_stepping) && get_num_ranks() > 1)
{
auto const choice = solver_mapping.at(solver_str);
if (choice != solve_opts::scalapack)
if (choice == solve_opts::direct)
{
std::cerr << "Distribution not implemented for implicit stepping\n";
valid = false;
Expand Down
Loading

0 comments on commit 8f11cea

Please sign in to comment.