diff --git a/src/muscl/HydroRunFunctors2D.h b/src/muscl/HydroRunFunctors2D.h index 0c9e831..5a0986c 100644 --- a/src/muscl/HydroRunFunctors2D.h +++ b/src/muscl/HydroRunFunctors2D.h @@ -1,11 +1,6 @@ #ifndef HYDRO_RUN_FUNCTORS_2D_H_ #define HYDRO_RUN_FUNCTORS_2D_H_ -#include // for std::numeric_limits -#ifdef __CUDA_ARCH__ -# include // for cuda math constants, e.g. CUDART_INF -#endif // __CUDA_ARCH__ - #include "shared/kokkos_shared.h" #include "HydroBaseFunctor2D.h" #include "shared/RiemannSolvers.h" @@ -34,43 +29,29 @@ class ComputeDtFunctor2D : public HydroBaseFunctor2D // static method which does it all: create and execute functor static void - apply(HydroParams params, DataArray2d Udata, int nbCells, real_t & invDt) + apply(HydroParams params, DataArray2d Udata, real_t & invDt) { - ComputeDtFunctor2D functor(params, Udata); - Kokkos::parallel_reduce(nbCells, functor, invDt); + ComputeDtFunctor2D functor(params, Udata); + Kokkos::Max reducer(invDt); + Kokkos::parallel_reduce( + "ComputeDtFunctor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor, + reducer); } - // Tell each thread how to initialize its reduction result. - KOKKOS_INLINE_FUNCTION - void - init(real_t & dst) const - { - // The identity under max is -Inf. - // Kokkos does not come with a portable way to access - // floating-point Inf and NaN. -#ifdef __CUDA_ARCH__ - dst = -CUDART_INF; -#else - dst = std::numeric_limits::min(); -#endif // __CUDA_ARCH__ - } // init - /* this is a reduce (max) functor */ KOKKOS_INLINE_FUNCTION void - operator()(const int & index, real_t & invDt) const + operator()(const int & i, const int & j, real_t & invDt) const { - const int isize = params.isize; - const int jsize = params.jsize; - const int ghostWidth = params.ghostWidth; - // const int nbvar = params.nbvar; + const int isize = params.isize; + const int jsize = params.jsize; + const int ghostWidth = params.ghostWidth; const real_t dx = params.dx; const real_t dy = params.dy; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth && i >= ghostWidth && i < isize - ghostWidth) + if (j >= ghostWidth and j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { HydroState uLoc; // conservative variables in current cell @@ -94,28 +75,6 @@ class ComputeDtFunctor2D : public HydroBaseFunctor2D } // operator () - - // "Join" intermediate results from different threads. - // This should normally implement the same reduction - // operation as operator() above. Note that both input - // arguments MUST be declared volatile. - KOKKOS_INLINE_FUNCTION -#if KOKKOS_VERSION_MAJOR > 3 - void - join(real_t & dst, const real_t & src) const -#else - void - join(volatile real_t & dst, const volatile real_t & src) const -#endif - { - // max reduce - if (dst < src) - { - dst = src; - } - } // join - - DataArray2d Udata; }; // ComputeDtFunctor2D @@ -148,36 +107,21 @@ class ComputeDtGravityFunctor2D : public HydroBaseFunctor2D // static method which does it all: create and execute functor static void - apply(HydroParams params, - real_t cfl, - VectorField2d gravity, - DataArray2d Udata, - int nbCells, - real_t & invDt) + apply(HydroParams params, real_t cfl, VectorField2d gravity, DataArray2d Udata, real_t & invDt) { ComputeDtGravityFunctor2D functor(params, cfl, gravity, Udata); - Kokkos::parallel_reduce(nbCells, functor, invDt); + Kokkos::Max reducer(invDt); + Kokkos::parallel_reduce( + "ComputeDtGravityFunctor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor, + reducer); } - // Tell each thread how to initialize its reduction result. - KOKKOS_INLINE_FUNCTION - void - init(real_t & dst) const - { - // The identity under max is -Inf. - // Kokkos does not come with a portable way to access - // floating-point Inf and NaN. -#ifdef __CUDA_ARCH__ - dst = -CUDART_INF; -#else - dst = std::numeric_limits::min(); -#endif // __CUDA_ARCH__ - } // init - /* this is a reduce (max) functor */ KOKKOS_INLINE_FUNCTION void - operator()(const int & index, real_t & invDt) const + operator()(const int & i, const int & j, real_t & invDt) const { const int isize = params.isize; const int jsize = params.jsize; @@ -185,10 +129,7 @@ class ComputeDtGravityFunctor2D : public HydroBaseFunctor2D // const int nbvar = params.nbvar; const real_t dx = fmin(params.dx, params.dy); - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth && i >= ghostWidth && i < isize - ghostWidth) + if (j >= ghostWidth and j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { HydroState uLoc; // conservative variables in current cell @@ -229,27 +170,6 @@ class ComputeDtGravityFunctor2D : public HydroBaseFunctor2D } // operator () - - // "Join" intermediate results from different threads. - // This should normally implement the same reduction - // operation as operator() above. Note that both input - // arguments MUST be declared volatile. - KOKKOS_INLINE_FUNCTION -#if KOKKOS_VERSION_MAJOR > 3 - void - join(real_t & dst, const real_t & src) const -#else - void - join(volatile real_t & dst, const volatile real_t & src) const -#endif - { - // max reduce - if (dst < src) - { - dst = src; - } - } // join - real_t cfl; VectorField2d gravity; DataArray2d Udata; @@ -279,23 +199,22 @@ class ConvertToPrimitivesFunctor2D : public HydroBaseFunctor2D static void apply(HydroParams params, DataArray2d Udata, DataArray2d Qdata) { - int nbCells = params.isize * params.jsize; ConvertToPrimitivesFunctor2D functor(params, Udata, Qdata); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + "ConvertToPrimitivesFunctor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; // const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= 0 && j < jsize && i >= 0 && i < isize) + if (j >= 0 and j < jsize and i >= 0 and i < isize) { HydroState uLoc; // conservative variables in current cell @@ -360,16 +279,13 @@ class ComputeFluxesAndUpdateFunctor2D : public HydroBaseFunctor2D KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j <= jsize - ghostWidth && i >= ghostWidth && i <= isize - ghostWidth) + if (j >= ghostWidth and j <= jsize - ghostWidth and i >= ghostWidth and i <= isize - ghostWidth) { HydroState qleft, qright; @@ -478,16 +394,13 @@ class ComputeTraceFunctor2D : public HydroBaseFunctor2D KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= 1 && j <= jsize - ghostWidth && i >= 1 && i <= isize - ghostWidth) + if (j >= 1 and j <= jsize - ghostWidth and i >= 1 and i <= isize - ghostWidth) { HydroState qLoc; // local primitive variables @@ -614,24 +527,23 @@ class ComputeAndStoreFluxesFunctor2D : public HydroBaseFunctor2D bool gravity_enabled, VectorField2d gravity) { - int nbCells = params.isize * params.jsize; ComputeAndStoreFluxesFunctor2D functor( params, Qdata, FluxData_x, FluxData_y, dt, gravity_enabled, gravity); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + "ComputeAndStoreFluxesFunctor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j <= jsize - ghostWidth && i >= ghostWidth && i <= isize - ghostWidth) + if (j >= ghostWidth and j <= jsize - ghostWidth and i >= ghostWidth and i <= isize - ghostWidth) { // local primitive variables @@ -876,23 +788,22 @@ class UpdateFunctor2D : public HydroBaseFunctor2D static void apply(HydroParams params, DataArray2d Udata, DataArray2d FluxData_x, DataArray2d FluxData_y) { - int nbCells = params.isize * params.jsize; UpdateFunctor2D functor(params, Udata, FluxData_x, FluxData_y); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + "UpdateFunctor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth && i >= ghostWidth && i < isize - ghostWidth) + if (j >= ghostWidth and j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { Udata(i, j, ID) += FluxData_x(i, j, ID); @@ -950,23 +861,22 @@ class UpdateDirFunctor2D : public HydroBaseFunctor2D static void apply(HydroParams params, DataArray2d Udata, DataArray2d FluxData) { - int nbCells = params.isize * params.jsize; UpdateDirFunctor2D functor(params, Udata, FluxData); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + "UpdateDirFunctor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth && i >= ghostWidth && i < isize - ghostWidth) + if (j >= ghostWidth and j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { if (dir == XDIR) @@ -1033,23 +943,22 @@ class ComputeSlopesFunctor2D : public HydroBaseFunctor2D static void apply(HydroParams params, DataArray2d Qdata, DataArray2d Slopes_x, DataArray2d Slopes_y) { - int nbCells = params.isize * params.jsize; ComputeSlopesFunctor2D functor(params, Qdata, Slopes_x, Slopes_y); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + "ComputeSlopesFunctor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth - 1 && j <= jsize - ghostWidth && i >= ghostWidth - 1 && + if (j >= ghostWidth - 1 and j <= jsize - ghostWidth and i >= ghostWidth - 1 and i <= isize - ghostWidth) { @@ -1164,24 +1073,23 @@ class ComputeTraceAndFluxes_Functor2D : public HydroBaseFunctor2D bool gravity_enabled, VectorField2d gravity) { - int nbCells = params.isize * params.jsize; ComputeTraceAndFluxes_Functor2D functor( params, Qdata, Slopes_x, Slopes_y, Fluxes, dt, gravity_enabled, gravity); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + "ComputeTraceAndFluxes_Functor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j <= jsize - ghostWidth && i >= ghostWidth && i <= isize - ghostWidth) + if (j >= ghostWidth and j <= jsize - ghostWidth and i >= ghostWidth and i <= isize - ghostWidth) { // local primitive variables @@ -1381,23 +1289,22 @@ class GravitySourceTermFunctor2D : public HydroBaseFunctor2D VectorField2d gravity, real_t dt) { - int nbCells = params.isize * params.jsize; GravitySourceTermFunctor2D functor(params, Udata_in, Udata_out, gravity, dt); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + "GravitySourceTermFunctor2D", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth && i >= ghostWidth && i < isize - ghostWidth) + if (j >= ghostWidth and j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { real_t rhoOld = Udata_in(i, j, ID); diff --git a/src/muscl/HydroRunFunctors3D.h b/src/muscl/HydroRunFunctors3D.h index 1fb1cba..5d59a9e 100644 --- a/src/muscl/HydroRunFunctors3D.h +++ b/src/muscl/HydroRunFunctors3D.h @@ -1,11 +1,6 @@ #ifndef HYDRO_RUN_FUNCTORS_3D_H_ #define HYDRO_RUN_FUNCTORS_3D_H_ -#include // for std::numeric_limits -#ifdef __CUDA_ARCH__ -# include -#endif // __CUDA_ARCH__ - #include "shared/kokkos_shared.h" #include "HydroBaseFunctor3D.h" #include "shared/RiemannSolvers.h" @@ -34,31 +29,21 @@ class ComputeDtFunctor3D : public HydroBaseFunctor3D // static method which does it all: create and execute functor static void - apply(HydroParams params, DataArray3d Udata, int nbCells, real_t & invDt) + apply(HydroParams params, DataArray3d Udata, real_t & invDt) { - ComputeDtFunctor3D functor(params, Udata); - Kokkos::parallel_reduce(nbCells, functor, invDt); + ComputeDtFunctor3D functor(params, Udata); + Kokkos::Max reducer(invDt); + Kokkos::parallel_reduce("ComputeDtFunctor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor, + reducer); } - // Tell each thread how to initialize its reduction result. - KOKKOS_INLINE_FUNCTION - void - init(real_t & dst) const - { - // The identity under max is -Inf. - // Kokkos does not come with a portable way to access - // floating-point Inf and NaN. -#ifdef __CUDA_ARCH__ - dst = -CUDART_INF; -#else - dst = std::numeric_limits::min(); -#endif // __CUDA_ARCH__ - } // init - /* this is a reduce (max) functor */ KOKKOS_INLINE_FUNCTION void - operator()(const int & index, real_t & invDt) const + operator()(const int & i, const int & j, const int & k, real_t & invDt) const { const int isize = params.isize; const int jsize = params.jsize; @@ -69,11 +54,8 @@ class ComputeDtFunctor3D : public HydroBaseFunctor3D const real_t dy = params.dy; const real_t dz = params.dz; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth && j >= ghostWidth && j < jsize - ghostWidth && - i >= ghostWidth && i < isize - ghostWidth) + if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and + j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { HydroState uLoc; // conservative variables in current cell @@ -100,26 +82,6 @@ class ComputeDtFunctor3D : public HydroBaseFunctor3D } // operator () - // "Join" intermediate results from different threads. - // This should normally implement the same reduction - // operation as operator() above. Note that both input - // arguments MUST be declared volatile. - KOKKOS_INLINE_FUNCTION -#if KOKKOS_VERSION_MAJOR > 3 - void - join(real_t & dst, const real_t & src) const -#else - void - join(volatile real_t & dst, const volatile real_t & src) const -#endif - { - // max reduce - if (dst < src) - { - dst = src; - } - } // join - DataArray3d Udata; }; // ComputeDtFunctor3D @@ -152,36 +114,21 @@ class ComputeDtGravityFunctor3D : public HydroBaseFunctor3D // static method which does it all: create and execute functor static void - apply(HydroParams params, - real_t cfl, - VectorField3d gravity, - DataArray3d Udata, - int nbCells, - real_t & invDt) + apply(HydroParams params, real_t cfl, VectorField3d gravity, DataArray3d Udata, real_t & invDt) { ComputeDtGravityFunctor3D functor(params, cfl, gravity, Udata); - Kokkos::parallel_reduce(nbCells, functor, invDt); + Kokkos::Max reducer(invDt); + Kokkos::parallel_reduce("ComputeDtGravityFunctor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor, + reducer); } - // Tell each thread how to initialize its reduction result. - KOKKOS_INLINE_FUNCTION - void - init(real_t & dst) const - { - // The identity under max is -Inf. - // Kokkos does not come with a portable way to access - // floating-point Inf and NaN. -#ifdef __CUDA_ARCH__ - dst = -CUDART_INF; -#else - dst = std::numeric_limits::min(); -#endif // __CUDA_ARCH__ - } // init - /* this is a reduce (max) functor */ KOKKOS_INLINE_FUNCTION void - operator()(const int & index, real_t & invDt) const + operator()(const int & i, const int & j, const int & k, real_t & invDt) const { const int isize = params.isize; const int jsize = params.jsize; @@ -192,11 +139,8 @@ class ComputeDtGravityFunctor3D : public HydroBaseFunctor3D real_t dx = fmin(params.dx, params.dy); dx = fmin(dx, params.dz); - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth && j >= ghostWidth && j < jsize - ghostWidth && - i >= ghostWidth && i < isize - ghostWidth) + if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and + j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { HydroState uLoc; // conservative variables in current cell @@ -240,27 +184,6 @@ class ComputeDtGravityFunctor3D : public HydroBaseFunctor3D } // operator () - - // "Join" intermediate results from different threads. - // This should normally implement the same reduction - // operation as operator() above. Note that both input - // arguments MUST be declared volatile. - KOKKOS_INLINE_FUNCTION -#if KOKKOS_VERSION_MAJOR > 3 - void - join(real_t & dst, const real_t & src) const -#else - void - join(volatile real_t & dst, const volatile real_t & src) const -#endif - { - // max reduce - if (dst < src) - { - dst = src; - } - } // join - real_t cfl; VectorField3d gravity; DataArray3d Udata; @@ -290,24 +213,23 @@ class ConvertToPrimitivesFunctor3D : public HydroBaseFunctor3D static void apply(HydroParams params, DataArray3d Udata, DataArray3d Qdata) { - int nbCells = params.isize * params.jsize * params.ksize; ConvertToPrimitivesFunctor3D functor(params, Udata, Qdata); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for("ConvertToPrimitivesFunctor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; // const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= 0 && k < ksize && j >= 0 && j < jsize && i >= 0 && i < isize) + if (k >= 0 and k < ksize and j >= 0 and j < jsize and i >= 0 and i < isize) { HydroState uLoc; // conservative variables in current cell @@ -390,26 +312,25 @@ class ComputeAndStoreFluxesFunctor3D : public HydroBaseFunctor3D bool gravity_enabled, VectorField3d gravity) { - int nbCells = params.isize * params.jsize * params.ksize; ComputeAndStoreFluxesFunctor3D functor( params, Qdata, FluxData_x, FluxData_y, FluxData_z, dt, gravity_enabled, gravity); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for("ComputeAndStoreFluxesFunctor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k <= ksize - ghostWidth && j >= ghostWidth && j <= jsize - ghostWidth && - i >= ghostWidth && i <= isize - ghostWidth) + if (k >= ghostWidth and k <= ksize - ghostWidth and j >= ghostWidth and + j <= jsize - ghostWidth and i >= ghostWidth and i <= isize - ghostWidth) { // local primitive variables @@ -827,25 +748,24 @@ class UpdateFunctor3D : public HydroBaseFunctor3D DataArray3d FluxData_y, DataArray3d FluxData_z) { - int nbCells = params.isize * params.jsize * params.ksize; UpdateFunctor3D functor(params, Udata, FluxData_x, FluxData_y, FluxData_z); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for("UpdateFunctor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth && j >= ghostWidth && j < jsize - ghostWidth && - i >= ghostWidth && i < isize - ghostWidth) + if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and + j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { Udata(i, j, k, ID) += FluxData_x(i, j, k, ID); @@ -920,25 +840,24 @@ class UpdateDirFunctor3D : public HydroBaseFunctor3D static void apply(HydroParams params, DataArray3d Udata, DataArray3d FluxData) { - int nbCells = params.isize * params.jsize * params.ksize; UpdateDirFunctor3D functor(params, Udata, FluxData); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for("UpdateDirFunctor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth && j >= ghostWidth && j < jsize - ghostWidth && - i >= ghostWidth && i < isize - ghostWidth) + if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and + j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { if (dir == XDIR) @@ -1031,25 +950,24 @@ class ComputeSlopesFunctor3D : public HydroBaseFunctor3D DataArray3d Slopes_y, DataArray3d Slopes_z) { - int nbCells = params.isize * params.jsize * params.ksize; ComputeSlopesFunctor3D functor(params, Qdata, Slopes_x, Slopes_y, Slopes_z); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for("ComputeSlopesFunctor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth - 1 && k <= ksize - ghostWidth && j >= ghostWidth - 1 && - j <= jsize - ghostWidth && i >= ghostWidth - 1 && i <= isize - ghostWidth) + if (k >= ghostWidth - 1 and k <= ksize - ghostWidth and j >= ghostWidth - 1 and + j <= jsize - ghostWidth and i >= ghostWidth - 1 and i <= isize - ghostWidth) { // local primitive variables @@ -1203,26 +1121,25 @@ class ComputeTraceAndFluxes_Functor3D : public HydroBaseFunctor3D bool gravity_enabled, VectorField3d gravity) { - int nbCells = params.isize * params.jsize * params.ksize; ComputeTraceAndFluxes_Functor3D functor( params, Qdata, Slopes_x, Slopes_y, Slopes_z, Fluxes, dt, gravity_enabled, gravity); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for("ComputeTraceAndFluxes_Functor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k <= ksize - ghostWidth && j >= ghostWidth && j <= jsize - ghostWidth && - i >= ghostWidth && i <= isize - ghostWidth) + if (k >= ghostWidth and k <= ksize - ghostWidth and j >= ghostWidth and + j <= jsize - ghostWidth and i >= ghostWidth and i <= isize - ghostWidth) { // local primitive variables @@ -1536,25 +1453,24 @@ class GravitySourceTermFunctor3D : public HydroBaseFunctor3D VectorField3d gravity, real_t dt) { - int nbCells = params.isize * params.jsize * params.ksize; GravitySourceTermFunctor3D functor(params, Udata_in, Udata_out, gravity, dt); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for("GravitySourceTermFunctor3D", + Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth && j >= ghostWidth && j < jsize - ghostWidth && - i >= ghostWidth && i < isize - ghostWidth) + if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and + j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { real_t rhoOld = Udata_in(i, j, k, ID); diff --git a/src/muscl/MHDRunFunctors2D.h b/src/muscl/MHDRunFunctors2D.h index f73d59c..5d5a3c4 100644 --- a/src/muscl/MHDRunFunctors2D.h +++ b/src/muscl/MHDRunFunctors2D.h @@ -1,11 +1,6 @@ #ifndef MHD_RUN_FUNCTORS_2D_H_ #define MHD_RUN_FUNCTORS_2D_H_ -#include // for std::numeric_limits -#ifdef __CUDA_ARCH__ -# include -#endif // __CUDA_ARCH__ - #include "shared/kokkos_shared.h" #include "MHDBaseFunctor2D.h" #include "shared/RiemannSolvers_MHD.h" @@ -32,31 +27,21 @@ class ComputeDtFunctor2D_MHD : public MHDBaseFunctor2D // static method which does it all: create and execute functor static void - apply(HydroParams params, DataArray2d Udata, int nbCells, real_t & invDt) + apply(HydroParams params, DataArray2d Udata, real_t & invDt) { ComputeDtFunctor2D_MHD functor(params, Udata); - Kokkos::parallel_reduce(nbCells, functor, invDt); + Kokkos::Max reducer(invDt); + Kokkos::parallel_reduce( + "ComputeDtFunctor2D_MHD", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor, + reducer); } - // Tell each thread how to initialize its reduction result. - KOKKOS_INLINE_FUNCTION - void - init(real_t & dst) const - { - // The identity under max is -Inf. - // Kokkos does not come with a portable way to access - // floating-point Inf and NaN. -#ifdef __CUDA_ARCH__ - dst = -CUDART_INF; -#else - dst = std::numeric_limits::min(); -#endif // __CUDA_ARCH__ - } // init - /* this is a reduce (max) functor */ KOKKOS_INLINE_FUNCTION void - operator()(const int & index, real_t & invDt) const + operator()(const int & i, const int & j, real_t & invDt) const { const int isize = params.isize; const int jsize = params.jsize; @@ -64,10 +49,7 @@ class ComputeDtFunctor2D_MHD : public MHDBaseFunctor2D const real_t dx = params.dx; const real_t dy = params.dy; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth && i >= ghostWidth && i < isize - ghostWidth) + if (j >= ghostWidth and j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { MHDState qLoc; // primitive variables in current cell @@ -94,27 +76,6 @@ class ComputeDtFunctor2D_MHD : public MHDBaseFunctor2D } // operator () - - // "Join" intermediate results from different threads. - // This should normally implement the same reduction - // operation as operator() above. Note that both input - // arguments MUST be declared volatile. - KOKKOS_INLINE_FUNCTION -#if KOKKOS_VERSION_MAJOR > 3 - void - join(real_t & dst, const real_t & src) const -#else - void - join(volatile real_t & dst, const volatile real_t & src) const -#endif - { - // max reduce - if (dst < src) - { - dst = src; - } - } // join - DataArray2d Qdata; }; // ComputeDtFunctor2D_MHD @@ -133,27 +94,27 @@ class ConvertToPrimitivesFunctor2D_MHD : public MHDBaseFunctor2D // static method which does it all: create and execute functor static void - apply(HydroParams params, DataArray2d Udata, DataArray2d Qdata, int nbCells) + apply(HydroParams params, DataArray2d Udata, DataArray2d Qdata) { ConvertToPrimitivesFunctor2D_MHD functor(params, Udata, Qdata); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + "ConvertToPrimitivesFunctor2D_MHD", + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; // const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - // magnetic field in neighbor cells real_t magFieldNeighbors[3]; - if (j >= 0 && j < jsize - 1 && i >= 0 && i < isize - 1) + if (j >= 0 and j < jsize - 1 and i >= 0 and i < isize - 1) { MHDState uLoc; // conservative variables in current cell @@ -231,26 +192,23 @@ class ComputeFluxesAndStoreFunctor2D_MHD : public MHDBaseFunctor2D DataArray2d Flux_x, DataArray2d Flux_y, real_t dtdx, - real_t dtdy, - int nbCells) + real_t dtdy) { ComputeFluxesAndStoreFunctor2D_MHD functor( params, Qm_x, Qm_y, Qp_x, Qp_y, Flux_x, Flux_y, dtdx, dtdy); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth + 1 && i >= ghostWidth && + if (j >= ghostWidth and j < jsize - ghostWidth + 1 and i >= ghostWidth and i < isize - ghostWidth + 1) { @@ -327,26 +285,23 @@ class ComputeEmfAndStoreFunctor2D : public MHDBaseFunctor2D DataArray2d QEdge_LB, DataArrayScalar Emf, real_t dtdx, - real_t dtdy, - int nbCells) + real_t dtdy) { ComputeEmfAndStoreFunctor2D functor( params, QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB, Emf, dtdx, dtdy); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth + 1 && i >= ghostWidth && + if (j >= ghostWidth and j < jsize - ghostWidth + 1 and i >= ghostWidth and i < isize - ghostWidth + 1) { @@ -424,8 +379,7 @@ class ComputeTraceFunctor2D_MHD : public MHDBaseFunctor2D DataArray2d QEdge_LT, DataArray2d QEdge_LB, real_t dtdx, - real_t dtdy, - int nbCells) + real_t dtdy) { ComputeTraceFunctor2D_MHD functor(params, Udata, @@ -440,21 +394,19 @@ class ComputeTraceFunctor2D_MHD : public MHDBaseFunctor2D QEdge_LB, dtdx, dtdy); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth - 2 && j < jsize - ghostWidth + 1 && i >= ghostWidth - 2 && + if (j >= ghostWidth - 2 and j < jsize - ghostWidth + 1 and i >= ghostWidth - 2 and i < isize - ghostWidth + 1) { @@ -537,25 +489,22 @@ class UpdateFunctor2D_MHD : public MHDBaseFunctor2D DataArray2d FluxData_x, DataArray2d FluxData_y, real_t dtdx, - real_t dtdy, - int nbCells) + real_t dtdy) { UpdateFunctor2D_MHD functor(params, Udata, FluxData_x, FluxData_y, dtdx, dtdy); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth && i >= ghostWidth && i < isize - ghostWidth) + if (j >= ghostWidth and j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { MHDState udata; @@ -638,29 +587,22 @@ class UpdateEmfFunctor2D : public MHDBaseFunctor2D // static method which does it all: create and execute functor static void - apply(HydroParams params, - DataArray2d Udata, - DataArrayScalar Emf, - real_t dtdx, - real_t dtdy, - int nbCells) + apply(HydroParams params, DataArray2d Udata, DataArrayScalar Emf, real_t dtdx, real_t dtdy) { UpdateEmfFunctor2D functor(params, Udata, Emf, dtdx, dtdy); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>({ 0, 0 }, { params.isize, params.jsize }), functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j < jsize - ghostWidth /*+1*/ && i >= ghostWidth && + if (j >= ghostWidth and j < jsize - ghostWidth /*+1*/ and i >= ghostWidth and i < isize - ghostWidth /*+1*/) { @@ -705,16 +647,13 @@ class ComputeTraceAndFluxes_Functor2D_MHD : public MHDBaseFunctor2D KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j) const { const int isize = params.isize; const int jsize = params.jsize; const int ghostWidth = params.ghostWidth; - int i, j; - index2coord(index, i, j, isize, jsize); - - if (j >= ghostWidth && j <= jsize - ghostWidth && i >= ghostWidth && i <= isize - ghostWidth) + if (j >= ghostWidth and j <= jsize - ghostWidth and i >= ghostWidth and i <= isize - ghostWidth) { // local primitive variables diff --git a/src/muscl/MHDRunFunctors3D.h b/src/muscl/MHDRunFunctors3D.h index a6ba201..97d0edd 100644 --- a/src/muscl/MHDRunFunctors3D.h +++ b/src/muscl/MHDRunFunctors3D.h @@ -1,11 +1,6 @@ #ifndef MHD_RUN_FUNCTORS_3D_H_ #define MHD_RUN_FUNCTORS_3D_H_ -#include // for std::numeric_limits -#ifdef __CUDA_ARCH__ -# include -#endif // __CUDA_ARCH__ - #include "shared/kokkos_shared.h" #include "MHDBaseFunctor3D.h" #include "shared/RiemannSolvers_MHD.h" @@ -32,31 +27,20 @@ class ComputeDtFunctor3D_MHD : public MHDBaseFunctor3D // static method which does it all: create and execute functor static void - apply(HydroParams params, DataArray3d Udata, int nbCells, real_t & invDt) + apply(HydroParams params, DataArray3d Udata, real_t & invDt) { ComputeDtFunctor3D_MHD functor(params, Udata); - Kokkos::parallel_reduce(nbCells, functor, invDt); + Kokkos::Max reducer(invDt); + Kokkos::parallel_reduce(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor, + reducer); } - // Tell each thread how to initialize its reduction result. - KOKKOS_INLINE_FUNCTION - void - init(real_t & dst) const - { - // The identity under max is -Inf. - // Kokkos does not come with a portable way to access - // floating-point Inf and NaN. -#ifdef __CUDA_ARCH__ - dst = -CUDART_INF; -#else - dst = std::numeric_limits::min(); -#endif // __CUDA_ARCH__ - } // init - /* this is a reduce (max) functor */ KOKKOS_INLINE_FUNCTION void - operator()(const int & index, real_t & invDt) const + operator()(const int & i, const int & j, const int & k, real_t & invDt) const { const int isize = params.isize; const int jsize = params.jsize; @@ -66,11 +50,8 @@ class ComputeDtFunctor3D_MHD : public MHDBaseFunctor3D const real_t dy = params.dy; const real_t dz = params.dz; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth && j >= ghostWidth && j < jsize - ghostWidth && - i >= ghostWidth && i < isize - ghostWidth) + if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and + j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { MHDState qLoc; // primitive variables in current cell @@ -98,27 +79,6 @@ class ComputeDtFunctor3D_MHD : public MHDBaseFunctor3D } // operator () - - // "Join" intermediate results from different threads. - // This should normally implement the same reduction - // operation as operator() above. Note that both input - // arguments MUST be declared volatile. - KOKKOS_INLINE_FUNCTION -#if KOKKOS_VERSION_MAJOR > 3 - void - join(real_t & dst, const real_t & src) const -#else - void - join(volatile real_t & dst, const volatile real_t & src) const -#endif - { - // max reduce - if (dst < src) - { - dst = src; - } - } // join - DataArray3d Qdata; }; // ComputeDtFunctor3D_MHD @@ -137,28 +97,27 @@ class ConvertToPrimitivesFunctor3D_MHD : public MHDBaseFunctor3D // static method which does it all: create and execute functor static void - apply(HydroParams params, DataArray3d Udata, DataArray3d Qdata, int nbCells) + apply(HydroParams params, DataArray3d Udata, DataArray3d Qdata) { ConvertToPrimitivesFunctor3D_MHD functor(params, Udata, Qdata); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; // const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - // magnetic field in neighbor cells real_t magFieldNeighbors[3]; - if (k >= 0 && k < ksize - 1 && j >= 0 && j < jsize - 1 && i >= 0 && i < isize - 1) + if (k >= 0 and k < ksize - 1 and j >= 0 and j < jsize - 1 and i >= 0 and i < isize - 1) { MHDState uLoc; // conservative variables in current cell @@ -218,29 +177,24 @@ class ComputeElecFieldFunctor3D : public MHDBaseFunctor3D // static method which does it all: create and execute functor static void - apply(HydroParams params, - DataArray3d Udata, - DataArray3d Qdata, - DataArrayVector3 ElecField, - int nbCells) + apply(HydroParams params, DataArray3d Udata, DataArray3d Qdata, DataArrayVector3 ElecField) { ComputeElecFieldFunctor3D functor(params, Udata, Qdata, ElecField); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; // const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k > 0 && k < ksize - 1 && j > 0 && j < jsize - 1 && i > 0 && i < isize - 1) + if (k > 0 and k < ksize - 1 and j > 0 and j < jsize - 1 and i > 0 and i < isize - 1) { real_t u, v, w, A, B, C; @@ -316,26 +270,24 @@ class ComputeMagSlopesFunctor3D : public MHDBaseFunctor3D DataArray3d Udata, DataArrayVector3 DeltaA, DataArrayVector3 DeltaB, - DataArrayVector3 DeltaC, - int nbCells) + DataArrayVector3 DeltaC) { ComputeMagSlopesFunctor3D functor(params, Udata, DeltaA, DeltaB, DeltaC); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; // const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k > 0 && k < ksize - 1 && j > 0 && j < jsize - 1 && i > 0 && i < isize - 1) + if (k > 0 and k < ksize - 1 and j > 0 and j < jsize - 1 and i > 0 and i < isize - 1) { real_t bfSlopes[15]; @@ -482,8 +434,7 @@ class ComputeTraceFunctor3D_MHD : public MHDBaseFunctor3D DataArray3d QEdge_LB3, real_t dtdx, real_t dtdy, - real_t dtdz, - int nbCells) + real_t dtdz) { ComputeTraceFunctor3D_MHD functor(params, Udata, @@ -513,23 +464,22 @@ class ComputeTraceFunctor3D_MHD : public MHDBaseFunctor3D dtdx, dtdy, dtdz); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth - 2 && k < ksize - ghostWidth + 1 && j >= ghostWidth - 2 && - j < jsize - ghostWidth + 1 && i >= ghostWidth - 2 && i < isize - ghostWidth + 1) + if (k >= ghostWidth - 2 and k < ksize - ghostWidth + 1 and j >= ghostWidth - 2 and + j < jsize - ghostWidth + 1 and i >= ghostWidth - 2 and i < isize - ghostWidth + 1) { MHDState q; @@ -749,28 +699,26 @@ class ComputeFluxesAndStoreFunctor3D_MHD : public MHDBaseFunctor3D DataArray3d Flux_z, real_t dtdx, real_t dtdy, - real_t dtdz, - int nbCells) + real_t dtdz) { ComputeFluxesAndStoreFunctor3D_MHD functor( params, Qm_x, Qm_y, Qm_z, Qp_x, Qp_y, Qp_z, Flux_x, Flux_y, Flux_z, dtdx, dtdy, dtdz); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth + 1 && j >= ghostWidth && - j < jsize - ghostWidth + 1 && i >= ghostWidth && i < isize - ghostWidth + 1) + if (k >= ghostWidth and k < ksize - ghostWidth + 1 and j >= ghostWidth and + j < jsize - ghostWidth + 1 and i >= ghostWidth and i < isize - ghostWidth + 1) { MHDState qleft, qright; @@ -891,8 +839,7 @@ class ComputeEmfAndStoreFunctor3D : public MHDBaseFunctor3D DataArrayVector3 Emf, real_t dtdx, real_t dtdy, - real_t dtdz, - int nbCells) + real_t dtdz) { ComputeEmfAndStoreFunctor3D functor(params, QEdge_RT, @@ -911,23 +858,22 @@ class ComputeEmfAndStoreFunctor3D : public MHDBaseFunctor3D dtdx, dtdy, dtdz); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth + 1 && j >= ghostWidth && - j < jsize - ghostWidth + 1 && i >= ghostWidth && i < isize - ghostWidth + 1) + if (k >= ghostWidth and k < ksize - ghostWidth + 1 and j >= ghostWidth and + j < jsize - ghostWidth + 1 and i >= ghostWidth and i < isize - ghostWidth + 1) { MHDState qEdge_emf[4]; @@ -1007,28 +953,26 @@ class UpdateFunctor3D_MHD : public MHDBaseFunctor3D DataArray3d FluxData_z, real_t dtdx, real_t dtdy, - real_t dtdz, - int nbCells) + real_t dtdz) { UpdateFunctor3D_MHD functor( params, Udata, FluxData_x, FluxData_y, FluxData_z, dtdx, dtdy, dtdz); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth && j >= ghostWidth && j < jsize - ghostWidth && - i >= ghostWidth && i < isize - ghostWidth) + if (k >= ghostWidth and k < ksize - ghostWidth and j >= ghostWidth and + j < jsize - ghostWidth and i >= ghostWidth and i < isize - ghostWidth) { MHDState udata; @@ -1119,27 +1063,25 @@ class UpdateEmfFunctor3D : public MHDBaseFunctor3D DataArrayVector3 Emf, real_t dtdx, real_t dtdy, - real_t dtdz, - int nbCells) + real_t dtdz) { UpdateEmfFunctor3D functor(params, Udata, Emf, dtdx, dtdy, dtdz); - Kokkos::parallel_for(nbCells, functor); + Kokkos::parallel_for(Kokkos::MDRangePolicy>( + { 0, 0, 0 }, { params.isize, params.jsize, params.ksize }), + functor); } KOKKOS_INLINE_FUNCTION void - operator()(const int & index) const + operator()(const int & i, const int & j, const int & k) const { const int isize = params.isize; const int jsize = params.jsize; const int ksize = params.ksize; const int ghostWidth = params.ghostWidth; - int i, j, k; - index2coord(index, i, j, k, isize, jsize, ksize); - - if (k >= ghostWidth && k < ksize - ghostWidth + 1 && j >= ghostWidth && - j < jsize - ghostWidth + 1 && i >= ghostWidth && i < isize - ghostWidth + 1) + if (k >= ghostWidth and k < ksize - ghostWidth + 1 and j >= ghostWidth and + j < jsize - ghostWidth + 1 and i >= ghostWidth and i < isize - ghostWidth + 1) { MHDState udata; diff --git a/src/muscl/SolverHydroMuscl.h b/src/muscl/SolverHydroMuscl.h index 5fa19ee..d1d6c2a 100644 --- a/src/muscl/SolverHydroMuscl.h +++ b/src/muscl/SolverHydroMuscl.h @@ -678,7 +678,7 @@ SolverHydroMuscl::compute_dt_local() conditional::type; // call device functor - ComputeDtFunctor::apply(params, params.settings.cfl, gravity, Udata, nbCells, invDt); + ComputeDtFunctor::apply(params, params.settings.cfl, gravity, Udata, invDt); } else { @@ -690,7 +690,7 @@ SolverHydroMuscl::compute_dt_local() typename std::conditional::type; // call device functor - ComputeDtFunctor::apply(params, Udata, nbCells, invDt); + ComputeDtFunctor::apply(params, Udata, invDt); } dt = params.settings.cfl / invDt; diff --git a/src/muscl/SolverMHDMuscl.cpp b/src/muscl/SolverMHDMuscl.cpp index 08af7ac..15fa12d 100644 --- a/src/muscl/SolverMHDMuscl.cpp +++ b/src/muscl/SolverMHDMuscl.cpp @@ -74,7 +74,7 @@ SolverMHDMuscl<3>::computeElectricField(DataArray Udata) { // call device functor - ComputeElecFieldFunctor3D::apply(params, Udata, Q, ElecField, nbCells); + ComputeElecFieldFunctor3D::apply(params, Udata, Q, ElecField); } // SolverMHDMuscl<3>::computeElectricField @@ -90,7 +90,7 @@ SolverMHDMuscl<3>::computeMagSlopes(DataArray Udata) { // call device functor - ComputeMagSlopesFunctor3D::apply(params, Udata, DeltaA, DeltaB, DeltaC, nbCells); + ComputeMagSlopesFunctor3D::apply(params, Udata, DeltaA, DeltaB, DeltaC); } // SolverMHDMuscl3D::computeMagSlopes @@ -113,20 +113,8 @@ SolverMHDMuscl<2>::computeTrace(DataArray Udata, real_t dt) dtdy = dt / params.dy; // call device functor - ComputeTraceFunctor2D_MHD::apply(params, - Udata, - Q, - Qm_x, - Qm_y, - Qp_x, - Qp_y, - QEdge_RT, - QEdge_RB, - QEdge_LT, - QEdge_LB, - dtdx, - dtdy, - nbCells); + ComputeTraceFunctor2D_MHD::apply( + params, Udata, Q, Qm_x, Qm_y, Qp_x, Qp_y, QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB, dtdx, dtdy); } // SolverMHDMuscl<2>::computeTrace @@ -178,8 +166,7 @@ SolverMHDMuscl<3>::computeTrace(DataArray Udata, real_t dt) QEdge_LB3, dtdx, dtdy, - dtdz, - nbCells); + dtdz); } // SolverMHDMuscl<3>::computeTrace @@ -198,7 +185,7 @@ SolverMHDMuscl<2>::computeFluxesAndStore(real_t dt) // call device functor ComputeFluxesAndStoreFunctor2D_MHD::apply( - params, Qm_x, Qm_y, Qp_x, Qp_y, Fluxes_x, Fluxes_y, dtdx, dtdy, nbCells); + params, Qm_x, Qm_y, Qp_x, Qp_y, Fluxes_x, Fluxes_y, dtdx, dtdy); } // SolverMHDMuscl<2>::computeFluxesAndStore @@ -217,20 +204,8 @@ SolverMHDMuscl<3>::computeFluxesAndStore(real_t dt) real_t dtdz = dt / params.dz; // call device functor - ComputeFluxesAndStoreFunctor3D_MHD::apply(params, - Qm_x, - Qm_y, - Qm_z, - Qp_x, - Qp_y, - Qp_z, - Fluxes_x, - Fluxes_y, - Fluxes_z, - dtdx, - dtdy, - dtdz, - nbCells); + ComputeFluxesAndStoreFunctor3D_MHD::apply( + params, Qm_x, Qm_y, Qm_z, Qp_x, Qp_y, Qp_z, Fluxes_x, Fluxes_y, Fluxes_z, dtdx, dtdy, dtdz); } // SolverMHDMuscl<3>::computeFluxesAndStore @@ -249,7 +224,7 @@ SolverMHDMuscl<2>::computeEmfAndStore(real_t dt) // call device functor ComputeEmfAndStoreFunctor2D::apply( - params, QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB, Emf1, dtdx, dtdy, nbCells); + params, QEdge_RT, QEdge_RB, QEdge_LT, QEdge_LB, Emf1, dtdx, dtdy); } // SolverMHSMuscl<2>::computeEmfAndStore @@ -284,8 +259,7 @@ SolverMHDMuscl<3>::computeEmfAndStore(real_t dt) Emf, dtdx, dtdy, - dtdz, - nbCells); + dtdz); } // SolverMHDMuscl<3>::computeEmfAndStore @@ -333,10 +307,10 @@ SolverMHDMuscl<2>::godunov_unsplit_impl(DataArray data_in, DataArray data_out, r computeEmfAndStore(dt); // actual update with fluxes - UpdateFunctor2D_MHD::apply(params, data_out, Fluxes_x, Fluxes_y, dtdx, dtdy, nbCells); + UpdateFunctor2D_MHD::apply(params, data_out, Fluxes_x, Fluxes_y, dtdx, dtdy); // actual update with emf - UpdateEmfFunctor2D::apply(params, data_out, Emf1, dtdx, dtdy, nbCells); + UpdateEmfFunctor2D::apply(params, data_out, Emf1, dtdx, dtdy); } timers[TIMER_NUM_SCHEME]->stop(); @@ -394,11 +368,10 @@ SolverMHDMuscl<3>::godunov_unsplit_impl(DataArray data_in, DataArray data_out, r computeEmfAndStore(dt); // actual update with fluxes - UpdateFunctor3D_MHD::apply( - params, data_out, Fluxes_x, Fluxes_y, Fluxes_z, dtdx, dtdy, dtdz, nbCells); + UpdateFunctor3D_MHD::apply(params, data_out, Fluxes_x, Fluxes_y, Fluxes_z, dtdx, dtdy, dtdz); // actual update with emf - UpdateEmfFunctor3D::apply(params, data_out, Emf, dtdx, dtdy, dtdz, nbCells); + UpdateEmfFunctor3D::apply(params, data_out, Emf, dtdx, dtdy, dtdz); } timers[TIMER_NUM_SCHEME]->stop(); diff --git a/src/muscl/SolverMHDMuscl.h b/src/muscl/SolverMHDMuscl.h index 3c30008..b98aa81 100644 --- a/src/muscl/SolverMHDMuscl.h +++ b/src/muscl/SolverMHDMuscl.h @@ -694,7 +694,7 @@ SolverMHDMuscl::compute_dt_local() typename std::conditional::type; // call device functor - ComputeDtFunctor::apply(params, Udata, nbCells, invDt); + ComputeDtFunctor::apply(params, Udata, invDt); dt = params.settings.cfl / invDt; @@ -811,7 +811,7 @@ SolverMHDMuscl::convertToPrimitives(DataArray Udata) conditional::type; // call device functor - ConvertToPrimitivesFunctor::apply(params, Udata, Q, nbCells); + ConvertToPrimitivesFunctor::apply(params, Udata, Q); } // SolverMHDMuscl::convertToPrimitives diff --git a/src/shared/kokkos_shared.h b/src/shared/kokkos_shared.h index 109e9ad..d9ea345 100644 --- a/src/shared/kokkos_shared.h +++ b/src/shared/kokkos_shared.h @@ -56,7 +56,7 @@ using VectorField3dHost = DataArrayVector3::HostMirror; KOKKOS_INLINE_FUNCTION void -index2coord(int index, int & i, int & j, int Nx, int Ny) +index2coord(int64_t index, int & i, int & j, int Nx, int Ny) { UNUSED(Nx); UNUSED(Ny); @@ -71,23 +71,24 @@ index2coord(int index, int & i, int & j, int Nx, int Ny) } KOKKOS_INLINE_FUNCTION -int +int64_t coord2index(int i, int j, int Nx, int Ny) { UNUSED(Nx); UNUSED(Ny); #ifdef KOKKOS_ENABLE_CUDA - return i + Nx * j; // left layout + int64_t res = i + Nx * j; // left layout #else - return j + Ny * i; // right layout + int64_t res = j + Ny * i; // right layout #endif + return res; } /* 3D */ KOKKOS_INLINE_FUNCTION void -index2coord(int index, int & i, int & j, int & k, int Nx, int Ny, int Nz) +index2coord(int64_t index, int & i, int & j, int & k, int Nx, int Ny, int Nz) { UNUSED(Nx); UNUSED(Nz); @@ -97,7 +98,7 @@ index2coord(int index, int & i, int & j, int & k, int Nx, int Ny, int Nz) j = (index - k * NxNy) / Nx; i = index - j * Nx - k * NxNy; #else - int NyNz = Ny * Nz; + int NyNz = Ny * Nz; i = index / NyNz; j = (index - i * NyNz) / Nz; k = index - j * Nz - i * NyNz; @@ -111,10 +112,11 @@ coord2index(int i, int j, int k, int Nx, int Ny, int Nz) UNUSED(Nx); UNUSED(Nz); #ifdef KOKKOS_ENABLE_CUDA - return i + Nx * j + Nx * Ny * k; // left layout + int64_t res = i + Nx * j + Nx * Ny * k; // left layout #else - return k + Nz * j + Nz * Ny * i; // right layout + int64_t res = k + Nz * j + Nz * Ny * i; // right layout #endif + return res; } } // namespace ppkMHD